diff --git a/opencl/source/api/api.cpp b/opencl/source/api/api.cpp index 4e1694da80..68b4740e27 100644 --- a/opencl/source/api/api.cpp +++ b/opencl/source/api/api.cpp @@ -1713,16 +1713,27 @@ cl_kernel CL_API_CALL clCreateKernel(cl_program clProgram, break; } - auto rootDeviceIndex = pProgram->getDevices()[0]->getRootDeviceIndex(); - const KernelInfo *pKernelInfo = pProgram->getKernelInfo(kernelName, rootDeviceIndex); - if (!pKernelInfo) { + bool kernelFound = false; + KernelInfoContainer kernelInfos; + kernelInfos.resize(pProgram->getMaxRootDeviceIndex() + 1); + + for (const auto &pClDevice : pProgram->getDevices()) { + auto rootDeviceIndex = pClDevice->getRootDeviceIndex(); + auto pKernelInfo = pProgram->getKernelInfo(kernelName, rootDeviceIndex); + if (pKernelInfo) { + kernelFound = true; + kernelInfos[rootDeviceIndex] = pKernelInfo; + } + } + + if (!kernelFound) { retVal = CL_INVALID_KERNEL_NAME; break; } kernel = Kernel::create( pProgram, - *pKernelInfo, + kernelInfos, &retVal); DBG_LOG_INPUTS("kernel", kernel); @@ -1758,13 +1769,18 @@ cl_int CL_API_CALL clCreateKernelsInProgram(cl_program clProgram, return retVal; } - auto rootDeviceIndex = pProgram->getDevices()[0]->getRootDeviceIndex(); for (unsigned int i = 0; i < numKernelsInProgram; ++i) { - const auto kernelInfo = pProgram->getKernelInfo(i, rootDeviceIndex); - DEBUG_BREAK_IF(kernelInfo == nullptr); + KernelInfoContainer kernelInfos; + kernelInfos.resize(pProgram->getMaxRootDeviceIndex() + 1); + for (const auto &pClDevice : pProgram->getDevices()) { + auto rootDeviceIndex = pClDevice->getRootDeviceIndex(); + auto kernelInfo = pProgram->getKernelInfo(i, rootDeviceIndex); + DEBUG_BREAK_IF(kernelInfo == nullptr); + kernelInfos[rootDeviceIndex] = kernelInfo; + } kernels[i] = Kernel::create( pProgram, - *kernelInfo, + kernelInfos, nullptr); gtpinNotifyKernelCreate(kernels[i]); } @@ -5490,7 +5506,7 @@ cl_kernel CL_API_CALL clCloneKernel(cl_kernel sourceKernel, if (CL_SUCCESS == retVal) { pClonedKernel = Kernel::create(pSourceKernel->getProgram(), - pSourceKernel->getKernelInfo(), + pSourceKernel->getKernelInfos(), &retVal); UNRECOVERABLE_IF((pClonedKernel == nullptr) || (retVal != CL_SUCCESS)); diff --git a/opencl/source/built_ins/built_ins.inl b/opencl/source/built_ins/built_ins.inl index c11a6042c3..06a998fa1f 100644 --- a/opencl/source/built_ins/built_ins.inl +++ b/opencl/source/built_ins/built_ins.inl @@ -22,10 +22,10 @@ void BuiltInOp::resizeKernelInstances(size_t size) convertToAuxKernel.reserve(size); for (size_t i = convertToNonAuxKernel.size(); i < size; i++) { - auto clonedNonAuxToAuxKernel = Kernel::create(baseKernel->getProgram(), baseKernel->getKernelInfo(), nullptr); + auto clonedNonAuxToAuxKernel = Kernel::create(baseKernel->getProgram(), baseKernel->getKernelInfos(), nullptr); clonedNonAuxToAuxKernel->setAuxTranslationDirection(AuxTranslationDirection::NonAuxToAux); - auto clonedAuxToNonAuxKernel = Kernel::create(baseKernel->getProgram(), baseKernel->getKernelInfo(), nullptr); + auto clonedAuxToNonAuxKernel = Kernel::create(baseKernel->getProgram(), baseKernel->getKernelInfos(), nullptr); clonedAuxToNonAuxKernel->setAuxTranslationDirection(AuxTranslationDirection::AuxToNonAux); clonedNonAuxToAuxKernel->cloneKernel(baseKernel); diff --git a/opencl/source/built_ins/builtins_dispatch_builder.h b/opencl/source/built_ins/builtins_dispatch_builder.h index 097176e421..0af49636a5 100644 --- a/opencl/source/built_ins/builtins_dispatch_builder.h +++ b/opencl/source/built_ins/builtins_dispatch_builder.h @@ -88,10 +88,14 @@ class BuiltinDispatchInfoBuilder { protected: template void grabKernels(KernelNameT &&kernelName, Kernel *&kernelDst, KernelsDescArgsT &&... kernelsDesc) { - const KernelInfo *kernelInfo = prog->getKernelInfo(kernelName, prog->getDevices()[0]->getRootDeviceIndex()); + auto rootDeviceIndex = clDevice.getRootDeviceIndex(); + const KernelInfo *kernelInfo = prog->getKernelInfo(kernelName, rootDeviceIndex); UNRECOVERABLE_IF(nullptr == kernelInfo); cl_int err = 0; - kernelDst = Kernel::create(prog.get(), *kernelInfo, &err); + KernelInfoContainer kernelInfos; + kernelInfos.resize(rootDeviceIndex + 1); + kernelInfos[rootDeviceIndex] = kernelInfo; + kernelDst = Kernel::create(prog.get(), kernelInfos, &err); kernelDst->isBuiltIn = true; usedKernels.push_back(std::unique_ptr(kernelDst)); grabKernels(std::forward(kernelsDesc)...); diff --git a/opencl/source/context/context.cpp b/opencl/source/context/context.cpp index f0f196c8b9..b058cb7e1b 100644 --- a/opencl/source/context/context.cpp +++ b/opencl/source/context/context.cpp @@ -399,12 +399,17 @@ SchedulerKernel &Context::getSchedulerKernel() { schedulerBuiltIn->pProgram = program; - auto kernelInfo = schedulerBuiltIn->pProgram->getKernelInfo(SchedulerKernel::schedulerName, clDevice->getRootDeviceIndex()); - DEBUG_BREAK_IF(!kernelInfo); + KernelInfoContainer kernelInfos; + kernelInfos.resize(getMaxRootDeviceIndex() + 1); + for (auto rootDeviceIndex : rootDeviceIndices) { + auto kernelInfo = schedulerBuiltIn->pProgram->getKernelInfo(SchedulerKernel::schedulerName, rootDeviceIndex); + DEBUG_BREAK_IF(!kernelInfo); + kernelInfos[rootDeviceIndex] = kernelInfo; + } schedulerBuiltIn->pKernel = Kernel::create( schedulerBuiltIn->pProgram, - *kernelInfo, + kernelInfos, &retVal); UNRECOVERABLE_IF(schedulerBuiltIn->pKernel->getScratchSize() != 0); diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index 60398b377b..d8af1ad682 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -64,14 +64,16 @@ class Surface; uint32_t Kernel::dummyPatchLocation = 0xbaddf00d; -Kernel::Kernel(Program *programArg, const KernelInfo &kernelInfoArg, bool schedulerKernel) - : slmTotalSize(kernelInfoArg.workloadInfo.slmStaticSize), - isParentKernel((kernelInfoArg.patchInfo.executionEnvironment != nullptr) ? (kernelInfoArg.patchInfo.executionEnvironment->HasDeviceEnqueue != 0) : false), +Kernel::Kernel(Program *programArg, const KernelInfoContainer &kernelInfosArg, bool schedulerKernel) + : slmTotalSize(kernelInfosArg[programArg->getDevices()[0]->getRootDeviceIndex()]->workloadInfo.slmStaticSize), + isParentKernel((kernelInfosArg[programArg->getDevices()[0]->getRootDeviceIndex()]->patchInfo.executionEnvironment != nullptr) + ? (kernelInfosArg[programArg->getDevices()[0]->getRootDeviceIndex()]->patchInfo.executionEnvironment->HasDeviceEnqueue != 0) + : false), isSchedulerKernel(schedulerKernel), executionEnvironment(programArg->getExecutionEnvironment()), program(programArg), deviceVector(programArg->getDevices()), - kernelInfo(kernelInfoArg) { + kernelInfos(kernelInfosArg) { kernelDeviceInfos.resize(program->getMaxRootDeviceIndex() + 1); program->retain(); program->retainForKernel(); @@ -98,7 +100,7 @@ Kernel::~Kernel() { } for (uint32_t i = 0; i < patchedArgumentsNum; i++) { - if (kernelInfo.kernelArgInfo.at(i).isSampler) { + if (getDefaultKernelInfo().kernelArgInfo.at(i).isSampler) { auto sampler = castToObject(kernelArguments.at(i).object); if (sampler) { sampler->decRefInternal(); @@ -168,16 +170,16 @@ template void Kernel::patchWithImplicitSurface(void *ptrToPatchInCrossThreadData cl_int Kernel::initialize() { cl_int retVal = CL_OUT_OF_HOST_MEMORY; do { - const auto &workloadInfo = kernelInfo.workloadInfo; - const auto &heapInfo = kernelInfo.heapInfo; - const auto &patchInfo = kernelInfo.patchInfo; - reconfigureKernel(); auto pClDevice = &getDevice(); auto rootDeviceIndex = pClDevice->getRootDeviceIndex(); auto &hwInfo = pClDevice->getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); - auto maxSimdSize = getKernelInfo().getMaxSimdSize(); + auto &kernelInfo = *kernelInfos[rootDeviceIndex]; + auto maxSimdSize = kernelInfo.getMaxSimdSize(); + const auto &workloadInfo = kernelInfo.workloadInfo; + const auto &heapInfo = kernelInfo.heapInfo; + const auto &patchInfo = kernelInfo.patchInfo; if (maxSimdSize != 1 && maxSimdSize < hwHelper.getMinimalSIMDSize()) { return CL_INVALID_KERNEL; @@ -443,13 +445,13 @@ cl_int Kernel::getInfo(cl_kernel_info paramName, size_t paramValueSize, switch (paramName) { case CL_KERNEL_FUNCTION_NAME: - pSrc = kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str(); - srcSize = kernelInfo.kernelDescriptor.kernelMetadata.kernelName.length() + 1; + pSrc = getDefaultKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(); + srcSize = getDefaultKernelInfo().kernelDescriptor.kernelMetadata.kernelName.length() + 1; break; case CL_KERNEL_NUM_ARGS: srcSize = sizeof(cl_uint); - numArgs = (cl_uint)kernelInfo.kernelArgInfo.size(); + numArgs = (cl_uint)getDefaultKernelInfo().kernelArgInfo.size(); pSrc = &numArgs; break; @@ -472,8 +474,8 @@ cl_int Kernel::getInfo(cl_kernel_info paramName, size_t paramValueSize, break; case CL_KERNEL_ATTRIBUTES: - pSrc = kernelInfo.attributes.c_str(); - srcSize = kernelInfo.attributes.length() + 1; + pSrc = getDefaultKernelInfo().attributes.c_str(); + srcSize = getDefaultKernelInfo().attributes.length() + 1; break; case CL_KERNEL_BINARY_PROGRAM_INTEL: @@ -481,7 +483,7 @@ cl_int Kernel::getInfo(cl_kernel_info paramName, size_t paramValueSize, srcSize = getKernelHeapSize(); break; case CL_KERNEL_BINARY_GPU_ADDRESS_INTEL: - nonCannonizedGpuAddress = GmmHelper::decanonize(kernelInfo.kernelAllocation->getGpuAddress()); + nonCannonizedGpuAddress = GmmHelper::decanonize(getDefaultKernelInfo().kernelAllocation->getGpuAddress()); pSrc = &nonCannonizedGpuAddress; srcSize = sizeof(nonCannonizedGpuAddress); break; @@ -502,8 +504,8 @@ cl_int Kernel::getArgInfo(cl_uint argIndx, cl_kernel_arg_info paramName, size_t cl_int retVal; const void *pSrc = nullptr; size_t srcSize = GetInfo::invalidSourceSize; - auto numArgs = (cl_uint)kernelInfo.kernelArgInfo.size(); - const auto &argInfo = kernelInfo.kernelArgInfo[argIndx]; + auto numArgs = (cl_uint)getDefaultKernelInfo().kernelArgInfo.size(); + const auto &argInfo = getDefaultKernelInfo().kernelArgInfo[argIndx]; if (argIndx >= numArgs) { retVal = CL_INVALID_ARG_INDEX; @@ -564,6 +566,8 @@ cl_int Kernel::getWorkGroupInfo(ClDevice &device, cl_kernel_work_group_info para size_t val[3]; } requiredWorkGroupSize; cl_ulong localMemorySize; + auto rootDeviceIndex = device.getRootDeviceIndex(); + auto &kernelInfo = *kernelInfos[rootDeviceIndex]; const auto &patchInfo = kernelInfo.patchInfo; size_t preferredWorkGroupSizeMultiple = 0; cl_ulong scratchSize; @@ -738,15 +742,15 @@ cl_int Kernel::getSubGroupInfo(ClDevice &clDevice, cl_kernel_sub_group_info para } const void *Kernel::getKernelHeap() const { - return kernelInfo.heapInfo.pKernelHeap; + return getDefaultKernelInfo().heapInfo.pKernelHeap; } size_t Kernel::getKernelHeapSize() const { - return kernelInfo.heapInfo.KernelHeapSize; + return getDefaultKernelInfo().heapInfo.KernelHeapSize; } void Kernel::substituteKernelHeap(void *newKernelHeap, size_t newKernelHeapSize) { - KernelInfo *pKernelInfo = const_cast(&kernelInfo); + KernelInfo *pKernelInfo = const_cast(&getDefaultKernelInfo()); void **pKernelHeap = const_cast(&pKernelInfo->heapInfo.pKernelHeap); *pKernelHeap = newKernelHeap; auto &heapInfo = pKernelInfo->heapInfo; @@ -767,15 +771,15 @@ void Kernel::substituteKernelHeap(void *newKernelHeap, size_t newKernelHeapSize) } bool Kernel::isKernelHeapSubstituted() const { - return kernelInfo.isKernelHeapSubstituted; + return getDefaultKernelInfo().isKernelHeapSubstituted; } uint64_t Kernel::getKernelId() const { - return kernelInfo.kernelId; + return getDefaultKernelInfo().kernelId; } void Kernel::setKernelId(uint64_t newKernelId) { - KernelInfo *pKernelInfo = const_cast(&kernelInfo); + KernelInfo *pKernelInfo = const_cast(&getDefaultKernelInfo()); pKernelInfo->kernelId = newKernelId; } uint32_t Kernel::getStartOffset() const { @@ -786,19 +790,19 @@ void Kernel::setStartOffset(uint32_t offset) { } void *Kernel::getSurfaceStateHeap(uint32_t rootDeviceIndex) const { - return kernelInfo.usesSsh ? kernelDeviceInfos[rootDeviceIndex].pSshLocal.get() : nullptr; + return kernelInfos[rootDeviceIndex]->usesSsh ? kernelDeviceInfos[rootDeviceIndex].pSshLocal.get() : nullptr; } size_t Kernel::getDynamicStateHeapSize() const { - return kernelInfo.heapInfo.DynamicStateHeapSize; + return getDefaultKernelInfo().heapInfo.DynamicStateHeapSize; } const void *Kernel::getDynamicStateHeap() const { - return kernelInfo.heapInfo.pDsh; + return getDefaultKernelInfo().heapInfo.pDsh; } size_t Kernel::getSurfaceStateHeapSize(uint32_t rootDeviceIndex) const { - return kernelInfo.usesSsh + return kernelInfos[rootDeviceIndex]->usesSsh ? kernelDeviceInfos[rootDeviceIndex].sshLocalSize : 0; } @@ -877,13 +881,13 @@ void *Kernel::patchBufferOffset(const KernelArgInfo &argInfo, void *svmPtr, Grap cl_int Kernel::setArgSvm(uint32_t argIndex, size_t svmAllocSize, void *svmPtr, GraphicsAllocation *svmAlloc, cl_mem_flags svmFlags) { auto rootDeviceIndex = getDevice().getRootDeviceIndex(); - void *ptrToPatch = patchBufferOffset(kernelInfo.kernelArgInfo[argIndex], svmPtr, svmAlloc, rootDeviceIndex); + void *ptrToPatch = patchBufferOffset(getDefaultKernelInfo().kernelArgInfo[argIndex], svmPtr, svmAlloc, rootDeviceIndex); setArgImmediate(argIndex, sizeof(void *), &svmPtr); storeKernelArg(argIndex, SVM_OBJ, nullptr, svmPtr, sizeof(void *), svmAlloc, svmFlags); if (requiresSshForBuffers()) { - const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex]; + const auto &kernelArgInfo = getDefaultKernelInfo().kernelArgInfo[argIndex]; auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap); Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, svmAllocSize + ptrDiff(svmPtr, ptrToPatch), ptrToPatch, 0, svmAlloc, svmFlags, 0); } @@ -900,7 +904,7 @@ cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocatio DBG_LOG_INPUTS("setArgBuffer svm_alloc", svmAlloc); auto rootDeviceIndex = getDevice().getRootDeviceIndex(); - const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex]; + const auto &kernelArgInfo = getDefaultKernelInfo().kernelArgInfo[argIndex]; storeKernelArg(argIndex, SVM_ALLOC_OBJ, svmAlloc, svmPtr, sizeof(uintptr_t)); @@ -914,7 +918,7 @@ cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocatio patchWithRequiredSize(patchLocation, patchSize, reinterpret_cast(svmPtr)); if (requiresSshForBuffers()) { - const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex]; + const auto &kernelArgInfo = getDefaultKernelInfo().kernelArgInfo[argIndex]; auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap); size_t allocSize = 0; size_t offset = 0; @@ -1036,14 +1040,15 @@ void Kernel::getSuggestedLocalWorkSize(const cl_uint workDim, const size_t *glob } uint32_t Kernel::getMaxWorkGroupCount(const cl_uint workDim, const size_t *localWorkSize, const CommandQueue *commandQueue) const { - auto &hardwareInfo = getHardwareInfo(commandQueue->getDevice().getRootDeviceIndex()); + auto rootDeviceIndex = commandQueue->getDevice().getRootDeviceIndex(); + auto &hardwareInfo = getHardwareInfo(rootDeviceIndex); auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); if (!hwHelper.isCooperativeDispatchSupported(commandQueue->getGpgpuEngine().getEngineType(), hardwareInfo.platform.eProductFamily)) { return 0; } - auto executionEnvironment = kernelInfo.patchInfo.executionEnvironment; + auto executionEnvironment = getDefaultKernelInfo().patchInfo.executionEnvironment; auto dssCount = hardwareInfo.gtSystemInfo.DualSubSliceCount; if (dssCount == 0) { dssCount = hardwareInfo.gtSystemInfo.SubSliceCount; @@ -1054,7 +1059,7 @@ uint32_t Kernel::getMaxWorkGroupCount(const cl_uint workDim, const size_t *local hardwareInfo.gtSystemInfo.EUCount, hardwareInfo.gtSystemInfo.ThreadCount / hardwareInfo.gtSystemInfo.EUCount); auto hasBarriers = ((executionEnvironment != nullptr) ? executionEnvironment->HasBarriers : 0u); - return KernelHelper::getMaxWorkGroupCount(kernelInfo.getMaxSimdSize(), + return KernelHelper::getMaxWorkGroupCount(kernelInfos[rootDeviceIndex]->getMaxSimdSize(), availableThreadCount, dssCount, dssCount * KB * hardwareInfo.capabilityTable.slmSize, @@ -1066,7 +1071,7 @@ uint32_t Kernel::getMaxWorkGroupCount(const cl_uint workDim, const size_t *local } inline void Kernel::makeArgsResident(CommandStreamReceiver &commandStreamReceiver) { - auto numArgs = kernelInfo.kernelArgInfo.size(); + auto numArgs = kernelInfos[commandStreamReceiver.getRootDeviceIndex()]->kernelArgInfo.size(); for (decltype(numArgs) argIndex = 0; argIndex < numArgs; argIndex++) { if (kernelArguments[argIndex].object) { if (kernelArguments[argIndex].type == SVM_ALLOC_OBJ) { @@ -1129,7 +1134,7 @@ void Kernel::makeResident(CommandStreamReceiver &commandStreamReceiver) { } makeArgsResident(commandStreamReceiver); - auto kernelIsaAllocation = this->kernelInfo.kernelAllocation; + auto kernelIsaAllocation = this->kernelInfos[rootDeviceIndex]->kernelAllocation; if (kernelIsaAllocation) { commandStreamReceiver.makeResident(*kernelIsaAllocation); } @@ -1169,7 +1174,7 @@ void Kernel::getResidency(std::vector &dst, uint32_t rootDeviceIndex) dst.push_back(surface); } - auto numArgs = kernelInfo.kernelArgInfo.size(); + auto numArgs = kernelInfos[rootDeviceIndex]->kernelArgInfo.size(); for (decltype(numArgs) argIndex = 0; argIndex < numArgs; argIndex++) { if (kernelArguments[argIndex].object) { if (kernelArguments[argIndex].type == SVM_ALLOC_OBJ) { @@ -1184,7 +1189,7 @@ void Kernel::getResidency(std::vector &dst, uint32_t rootDeviceIndex) } } - auto kernelIsaAllocation = this->kernelInfo.kernelAllocation; + auto kernelIsaAllocation = this->kernelInfos[rootDeviceIndex]->kernelAllocation; if (kernelIsaAllocation) { GeneralSurface *surface = new GeneralSurface(kernelIsaAllocation); dst.push_back(surface); @@ -1194,7 +1199,7 @@ void Kernel::getResidency(std::vector &dst, uint32_t rootDeviceIndex) } bool Kernel::requiresCoherency() { - auto numArgs = kernelInfo.kernelArgInfo.size(); + auto numArgs = getDefaultKernelInfo().kernelArgInfo.size(); for (decltype(numArgs) argIndex = 0; argIndex < numArgs; argIndex++) { if (kernelArguments[argIndex].object) { if (kernelArguments[argIndex].type == SVM_ALLOC_OBJ) { @@ -1228,7 +1233,7 @@ cl_int Kernel::setArgLocal(uint32_t argIndex, // Extract our current slmOffset auto slmOffset = *ptrOffset(crossThreadData, - kernelInfo.kernelArgInfo[argIndex].kernelArgPatchInfoVector[0].crossthreadOffset); + getDefaultKernelInfo().kernelArgInfo[argIndex].kernelArgPatchInfoVector[0].crossthreadOffset); // Add our size slmOffset += static_cast(argSize); @@ -1236,7 +1241,7 @@ cl_int Kernel::setArgLocal(uint32_t argIndex, // Update all slm offsets after this argIndex ++argIndex; while (argIndex < slmSizes.size()) { - const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex]; + const auto &kernelArgInfo = getDefaultKernelInfo().kernelArgInfo[argIndex]; auto slmAlignment = kernelArgInfo.slmAlignment; // If an local argument, alignment should be non-zero @@ -1254,7 +1259,7 @@ cl_int Kernel::setArgLocal(uint32_t argIndex, ++argIndex; } - slmTotalSize = kernelInfo.workloadInfo.slmStaticSize + alignUp(slmOffset, KB); + slmTotalSize = getDefaultKernelInfo().workloadInfo.slmStaticSize + alignUp(slmOffset, KB); return CL_SUCCESS; } @@ -1266,7 +1271,7 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex, if (argSize != sizeof(cl_mem *)) return CL_INVALID_ARG_SIZE; - const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex]; + const auto &kernelArgInfo = getDefaultKernelInfo().kernelArgInfo[argIndex]; auto clMem = reinterpret_cast(argVal); auto rootDeviceIndex = getDevice().getRootDeviceIndex(); patchBufferOffset(kernelArgInfo, nullptr, nullptr, rootDeviceIndex); @@ -1359,7 +1364,7 @@ cl_int Kernel::setArgPipe(uint32_t argIndex, return CL_INVALID_ARG_SIZE; } - const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex]; + const auto &kernelArgInfo = getDefaultKernelInfo().kernelArgInfo[argIndex]; auto clMem = reinterpret_cast(argVal); if (clMem && *clMem) { @@ -1415,7 +1420,7 @@ cl_int Kernel::setArgImageWithMipLevel(uint32_t argIndex, const void *argVal, uint32_t mipLevel) { auto retVal = CL_INVALID_ARG_VALUE; auto rootDeviceIndex = getDevice().getRootDeviceIndex(); - patchBufferOffset(kernelInfo.kernelArgInfo[argIndex], nullptr, nullptr, rootDeviceIndex); + patchBufferOffset(getDefaultKernelInfo().kernelArgInfo[argIndex], nullptr, nullptr, rootDeviceIndex); auto clMemObj = *(static_cast(argVal)); auto pImage = castToObject(clMemObj); @@ -1424,7 +1429,7 @@ cl_int Kernel::setArgImageWithMipLevel(uint32_t argIndex, if (pImage->peekSharingHandler()) { usingSharedObjArgs = true; } - const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex]; + const auto &kernelArgInfo = getDefaultKernelInfo().kernelArgInfo[argIndex]; DBG_LOG_INPUTS("setArgImage cl_mem", clMemObj); @@ -1435,7 +1440,7 @@ cl_int Kernel::setArgImageWithMipLevel(uint32_t argIndex, // Sets SS structure if (kernelArgInfo.isMediaImage) { - DEBUG_BREAK_IF(!kernelInfo.isVmeWorkload); + DEBUG_BREAK_IF(!getDefaultKernelInfo().isVmeWorkload); pImage->setMediaImageArg(surfaceState, rootDeviceIndex); } else { pImage->setImageArg(surfaceState, kernelArgInfo.isMediaBlockImage, mipLevel, rootDeviceIndex); @@ -1481,7 +1486,7 @@ cl_int Kernel::setArgImmediate(uint32_t argIndex, auto rootDeviceIndex = getDevice().getRootDeviceIndex(); if (argVal) { - const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex]; + const auto &kernelArgInfo = getDefaultKernelInfo().kernelArgInfo[argIndex]; DEBUG_BREAK_IF(kernelArgInfo.kernelArgPatchInfoVector.size() <= 0); storeKernelArg(argIndex, NONE_OBJ, nullptr, nullptr, argSize); @@ -1534,7 +1539,7 @@ cl_int Kernel::setArgSampler(uint32_t argIndex, } if (pSampler && argSize == sizeof(cl_sampler *)) { - const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex]; + const auto &kernelArgInfo = getDefaultKernelInfo().kernelArgInfo[argIndex]; storeKernelArg(argIndex, SAMPLER_OBJ, clSamplerObj, argVal, argSize); @@ -1577,7 +1582,7 @@ cl_int Kernel::setArgAccelerator(uint32_t argIndex, if (pAccelerator) { storeKernelArg(argIndex, ACCELERATOR_OBJ, clAcceleratorObj, argVal, argSize); - const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex]; + const auto &kernelArgInfo = getDefaultKernelInfo().kernelArgInfo[argIndex]; if (kernelArgInfo.samplerArgumentType == iOpenCL::SAMPLER_OBJECT_VME) { auto crossThreadData = getCrossThreadData(rootDeviceIndex); @@ -1628,7 +1633,7 @@ cl_int Kernel::setArgDevQueue(uint32_t argIndex, storeKernelArg(argIndex, DEVICE_QUEUE_OBJ, clDeviceQueue, argVal, argSize); - const auto &kernelArgPatchInfo = kernelInfo.kernelArgInfo[argIndex].kernelArgPatchInfoVector[0]; + const auto &kernelArgPatchInfo = kernelInfos[rootDeviceIndex]->kernelArgInfo[argIndex].kernelArgPatchInfoVector[0]; auto patchLocation = ptrOffset(reinterpret_cast(getCrossThreadData(rootDeviceIndex)), kernelArgPatchInfo.crossthreadOffset); @@ -1659,6 +1664,7 @@ void Kernel::unsetArg(uint32_t argIndex) { void Kernel::createReflectionSurface() { auto pClDevice = program->getDevices()[0]; + auto rootDeviceIndex = pClDevice->getRootDeviceIndex(); if (this->isParentKernel && kernelReflectionSurface == nullptr) { auto &hwInfo = pClDevice->getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); @@ -1678,7 +1684,7 @@ void Kernel::createReflectionSurface() { size_t kernelReflectionSize = alignUp(sizeof(IGIL_KernelDataHeader) + blockCount * sizeof(IGIL_KernelAddressData), sizeof(void *)); uint32_t kernelDataOffset = static_cast(kernelReflectionSize); - uint32_t parentSSHAlignedSize = alignUp(this->kernelInfo.heapInfo.SurfaceStateHeapSize, hwHelper.getBindingTableStateAlignement()); + uint32_t parentSSHAlignedSize = alignUp(this->kernelInfos[rootDeviceIndex]->heapInfo.SurfaceStateHeapSize, hwHelper.getBindingTableStateAlignement()); uint32_t btOffset = parentSSHAlignedSize; for (uint32_t i = 0; i < blockCount; i++) { @@ -2225,22 +2231,25 @@ void Kernel::resetSharedObjectsPatchAddresses() { void Kernel::provideInitializationHints() { - const auto &patchInfo = kernelInfo.patchInfo; Context *context = program->getContextPtr(); if (context == nullptr || !context->isProvidingPerformanceHints()) return; - for (const auto &kernelDeviceInfo : kernelDeviceInfos) { - if (kernelDeviceInfo.privateSurfaceSize) { - context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, PRIVATE_MEMORY_USAGE_TOO_HIGH, - kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str(), kernelDeviceInfo.privateSurfaceSize); + for (auto i = 0u; i < kernelDeviceInfos.size(); i++) { + if (!kernelInfos[i]) { + continue; } - } - if (patchInfo.mediavfestate) { - auto scratchSize = patchInfo.mediavfestate->PerThreadScratchSpace; - scratchSize *= getDevice().getSharedDeviceInfo().computeUnitsUsedForScratch * getKernelInfo().getMaxSimdSize(); - if (scratchSize > 0) { - context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, REGISTER_PRESSURE_TOO_HIGH, - kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str(), scratchSize); + if (kernelDeviceInfos[i].privateSurfaceSize) { + context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, PRIVATE_MEMORY_USAGE_TOO_HIGH, + kernelInfos[i]->kernelDescriptor.kernelMetadata.kernelName.c_str(), kernelDeviceInfos[i].privateSurfaceSize); + } + const auto &patchInfo = kernelInfos[i]->patchInfo; + if (patchInfo.mediavfestate) { + auto scratchSize = patchInfo.mediavfestate->PerThreadScratchSpace; + scratchSize *= getDevice().getSharedDeviceInfo().computeUnitsUsedForScratch * getKernelInfo().getMaxSimdSize(); + if (scratchSize > 0) { + context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, REGISTER_PRESSURE_TOO_HIGH, + kernelInfos[i]->kernelDescriptor.kernelMetadata.kernelName.c_str(), scratchSize); + } } } } @@ -2248,7 +2257,7 @@ void Kernel::provideInitializationHints() { void Kernel::patchDefaultDeviceQueue(DeviceQueue *devQueue) { auto rootDeviceIndex = devQueue->getDevice().getRootDeviceIndex(); - const auto &patchInfo = kernelInfo.patchInfo; + const auto &patchInfo = kernelInfos[rootDeviceIndex]->patchInfo; if (patchInfo.pAllocateStatelessDefaultDeviceQueueSurface) { if (kernelDeviceInfos[rootDeviceIndex].crossThreadData) { auto patchLocation = ptrOffset(reinterpret_cast(getCrossThreadData(rootDeviceIndex)), @@ -2269,7 +2278,7 @@ void Kernel::patchDefaultDeviceQueue(DeviceQueue *devQueue) { void Kernel::patchEventPool(DeviceQueue *devQueue) { auto rootDeviceIndex = devQueue->getDevice().getRootDeviceIndex(); - const auto &patchInfo = kernelInfo.patchInfo; + const auto &patchInfo = kernelInfos[rootDeviceIndex]->patchInfo; if (patchInfo.pAllocateStatelessEventPoolSurface) { if (kernelDeviceInfos[rootDeviceIndex].crossThreadData) { auto patchLocation = ptrOffset(reinterpret_cast(getCrossThreadData(rootDeviceIndex)), @@ -2291,7 +2300,7 @@ void Kernel::patchEventPool(DeviceQueue *devQueue) { void Kernel::patchBlocksSimdSize(uint32_t rootDeviceIndex) { BlockKernelManager *blockManager = program->getBlockKernelManager(); - for (auto &idOffset : kernelInfo.childrenKernelsIdOffset) { + for (auto &idOffset : kernelInfos[rootDeviceIndex]->childrenKernelsIdOffset) { DEBUG_BREAK_IF(!(idOffset.first < static_cast(blockManager->getCount()))); @@ -2302,12 +2311,12 @@ void Kernel::patchBlocksSimdSize(uint32_t rootDeviceIndex) { } bool Kernel::usesSyncBuffer() { - return (kernelInfo.patchInfo.pAllocateSyncBuffer != nullptr); + return (getDefaultKernelInfo().patchInfo.pAllocateSyncBuffer != nullptr); } void Kernel::patchSyncBuffer(Device &device, GraphicsAllocation *gfxAllocation, size_t bufferOffset) { auto rootDeviceIndex = device.getRootDeviceIndex(); - auto &patchInfo = kernelInfo.patchInfo; + auto &patchInfo = kernelInfos[rootDeviceIndex]->patchInfo; auto bufferPatchAddress = ptrOffset(getCrossThreadData(rootDeviceIndex), patchInfo.pAllocateSyncBuffer->DataParamOffset); patchWithRequiredSize(bufferPatchAddress, patchInfo.pAllocateSyncBuffer->DataParamSize, ptrOffset(gfxAllocation->getGpuAddressToPatch(), bufferOffset)); @@ -2324,7 +2333,7 @@ void Kernel::patchSyncBuffer(Device &device, GraphicsAllocation *gfxAllocation, template void Kernel::patchReflectionSurface(DeviceQueue *, PrintfHandler *); bool Kernel::isPatched() const { - return patchedArgumentsNum == kernelInfo.argumentsToPatchNum; + return patchedArgumentsNum == getDefaultKernelInfo().argumentsToPatchNum; } cl_int Kernel::checkCorrectImageAccessQualifier(cl_uint argIndex, size_t argSize, @@ -2352,7 +2361,7 @@ void Kernel::resolveArgs() { return; bool canTransformImageTo2dArray = true; for (uint32_t i = 0; i < patchedArgumentsNum; i++) { - if (kernelInfo.kernelArgInfo.at(i).isSampler) { + if (getDefaultKernelInfo().kernelArgInfo.at(i).isSampler) { auto sampler = castToObject(kernelArguments.at(i).object); if (sampler->isTransformable()) { canTransformImageTo2dArray = true; @@ -2364,9 +2373,9 @@ void Kernel::resolveArgs() { } auto rootDeviceIndex = getDevice().getRootDeviceIndex(); if (canTransformImageTo2dArray) { - imageTransformer->transformImagesTo2dArray(kernelInfo, kernelArguments, getSurfaceStateHeap(rootDeviceIndex)); + imageTransformer->transformImagesTo2dArray(getDefaultKernelInfo(), kernelArguments, getSurfaceStateHeap(rootDeviceIndex)); } else if (imageTransformer->didTransform()) { - imageTransformer->transformImagesTo3d(kernelInfo, kernelArguments, getSurfaceStateHeap(rootDeviceIndex)); + imageTransformer->transformImagesTo3d(getDefaultKernelInfo(), kernelArguments, getSurfaceStateHeap(rootDeviceIndex)); } } @@ -2378,7 +2387,7 @@ bool Kernel::canTransformImages() const { void Kernel::fillWithBuffersForAuxTranslation(MemObjsForAuxTranslation &memObjsForAuxTranslation) { memObjsForAuxTranslation.reserve(getKernelArgsNumber()); for (uint32_t i = 0; i < getKernelArgsNumber(); i++) { - if (BUFFER_OBJ == kernelArguments.at(i).type && !kernelInfo.kernelArgInfo.at(i).pureStatefulBufferAccess) { + if (BUFFER_OBJ == kernelArguments.at(i).type && !getDefaultKernelInfo().kernelArgInfo.at(i).pureStatefulBufferAccess) { auto buffer = castToObject(getKernelArg(i)); if (buffer && buffer->getMultiGraphicsAllocation().getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) { memObjsForAuxTranslation.insert(buffer); @@ -2386,7 +2395,7 @@ void Kernel::fillWithBuffersForAuxTranslation(MemObjsForAuxTranslation &memObjsF auto &context = this->program->getContext(); if (context.isProvidingPerformanceHints()) { context.providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, KERNEL_ARGUMENT_AUX_TRANSLATION, - kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str(), i, kernelInfo.kernelArgInfo.at(i).metadataExtended->argName.c_str()); + getDefaultKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), i, getDefaultKernelInfo().kernelArgInfo.at(i).metadataExtended->argName.c_str()); } } } @@ -2452,10 +2461,10 @@ uint64_t Kernel::getKernelStartOffset( uint64_t kernelStartOffset = 0; - if (kernelInfo.getGraphicsAllocation()) { - kernelStartOffset = kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch(); + if (kernelInfos[rootDeviceIndex]->getGraphicsAllocation()) { + kernelStartOffset = kernelInfos[rootDeviceIndex]->getGraphicsAllocation()->getGpuAddressToPatch(); if (localIdsGenerationByRuntime == false && kernelUsesLocalIds == true) { - kernelStartOffset += kernelInfo.patchInfo.threadPayload->OffsetToSkipPerThreadDataLoad; + kernelStartOffset += kernelInfos[rootDeviceIndex]->patchInfo.threadPayload->OffsetToSkipPerThreadDataLoad; } } @@ -2465,14 +2474,16 @@ uint64_t Kernel::getKernelStartOffset( auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); if (isCssUsed && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) { - kernelStartOffset += kernelInfo.patchInfo.threadPayload->OffsetToSkipSetFFIDGP; + kernelStartOffset += kernelInfos[rootDeviceIndex]->patchInfo.threadPayload->OffsetToSkipSetFFIDGP; } return kernelStartOffset; } void Kernel::patchBindlessSurfaceStateOffsets(const Device &device, const size_t sshOffset) { - const bool bindlessUsed = this->kernelInfo.kernelDescriptor.kernelAttributes.bufferAddressingMode == KernelDescriptor::AddressingMode::BindlessAndStateless; + auto rootDeviceIndex = device.getRootDeviceIndex(); + auto &kernelInfo = *kernelInfos[rootDeviceIndex]; + const bool bindlessUsed = kernelInfo.kernelDescriptor.kernelAttributes.bufferAddressingMode == KernelDescriptor::AddressingMode::BindlessAndStateless; if (bindlessUsed) { auto &hardwareInfo = device.getHardwareInfo(); @@ -2525,4 +2536,16 @@ bool Kernel::requiresWaDisableRccRhwoOptimization(uint32_t rootDeviceIndex) cons const HardwareInfo &Kernel::getHardwareInfo(uint32_t rootDeviceIndex) const { return *executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo(); } + +const KernelInfo &Kernel::getDefaultKernelInfo() const { + const KernelInfo *pKernelInfo = nullptr; + for (auto &kernelInfo : kernelInfos) { + if (kernelInfo) { + pKernelInfo = kernelInfo; + break; + } + } + UNRECOVERABLE_IF(!pKernelInfo); + return *pKernelInfo; +} } // namespace NEO diff --git a/opencl/source/kernel/kernel.h b/opencl/source/kernel/kernel.h index 0cb78bd9a4..70887448ce 100644 --- a/opencl/source/kernel/kernel.h +++ b/opencl/source/kernel/kernel.h @@ -43,6 +43,8 @@ struct OpenCLObjectMapper<_cl_kernel> { typedef class Kernel DerivedType; }; +using KernelInfoContainer = StackVec; + class Kernel : public BaseObject<_cl_kernel> { public: static const cl_ulong objectMagic = 0x3284ADC8EA0AFE25LL; @@ -77,11 +79,11 @@ class Kernel : public BaseObject<_cl_kernel> { const void *argVal); template - static kernel_t *create(program_t *program, const KernelInfo &kernelInfo, cl_int *errcodeRet) { + static kernel_t *create(program_t *program, const KernelInfoContainer &kernelInfos, cl_int *errcodeRet) { cl_int retVal; kernel_t *pKernel = nullptr; - pKernel = new kernel_t(program, kernelInfo); + pKernel = new kernel_t(program, kernelInfos); retVal = pKernel->initialize(); if (retVal != CL_SUCCESS) { @@ -96,7 +98,7 @@ class Kernel : public BaseObject<_cl_kernel> { if (FileLoggerInstance().enabled()) { std::string source; program->getSource(source); - FileLoggerInstance().dumpKernel(kernelInfo.kernelDescriptor.kernelMetadata.kernelName, source); + FileLoggerInstance().dumpKernel(kernelInfos[program->getDevices()[0]->getRootDeviceIndex()]->kernelDescriptor.kernelMetadata.kernelName, source); } return pKernel; @@ -178,15 +180,18 @@ class Kernel : public BaseObject<_cl_kernel> { } size_t getKernelArgsNumber() const { - return kernelInfo.kernelArgInfo.size(); + return getDefaultKernelInfo().kernelArgInfo.size(); } bool requiresSshForBuffers() const { - return kernelInfo.requiresSshForBuffers; + return getDefaultKernelInfo().requiresSshForBuffers; } const KernelInfo &getKernelInfo() const { - return kernelInfo; + return getDefaultKernelInfo(); + } + const KernelInfoContainer &getKernelInfos() const { + return kernelInfos; } Context &getContext() const { @@ -196,11 +201,11 @@ class Kernel : public BaseObject<_cl_kernel> { Program *getProgram() const { return program; } uint32_t getScratchSize() { - return kernelInfo.patchInfo.mediavfestate ? kernelInfo.patchInfo.mediavfestate->PerThreadScratchSpace : 0; + return getDefaultKernelInfo().patchInfo.mediavfestate ? getDefaultKernelInfo().patchInfo.mediavfestate->PerThreadScratchSpace : 0; } uint32_t getPrivateScratchSize() { - return kernelInfo.patchInfo.mediaVfeStateSlot1 ? kernelInfo.patchInfo.mediaVfeStateSlot1->PerThreadScratchSpace : 0; + return getDefaultKernelInfo().patchInfo.mediaVfeStateSlot1 ? getDefaultKernelInfo().patchInfo.mediaVfeStateSlot1->PerThreadScratchSpace : 0; } void createReflectionSurface(); @@ -278,7 +283,7 @@ class Kernel : public BaseObject<_cl_kernel> { const SimpleKernelArgInfo &getKernelArgInfo(uint32_t argIndex) const; bool getAllowNonUniform() const { return program->getAllowNonUniform(); } - bool isVmeKernel() const { return kernelInfo.isVmeWorkload; } + bool isVmeKernel() const { return getDefaultKernelInfo().isVmeWorkload; } bool requiresSpecialPipelineSelectMode() const { return specialPipelineSelectMode; } MOCKABLE_VIRTUAL bool isSingleSubdevicePreferred() const { return false; } @@ -348,25 +353,25 @@ class Kernel : public BaseObject<_cl_kernel> { return executionType; } bool isUsingSyncBuffer() const { - return (kernelInfo.patchInfo.pAllocateSyncBuffer != nullptr); + return (getDefaultKernelInfo().patchInfo.pAllocateSyncBuffer != nullptr); } bool checkIfIsParentKernelAndBlocksUsesPrintf(); bool is32Bit() const { - return kernelInfo.gpuPointerSize == 4; + return getDefaultKernelInfo().gpuPointerSize == 4; } int32_t getDebugSurfaceBti() const { - if (kernelInfo.patchInfo.pAllocateSystemThreadSurface) { - return kernelInfo.patchInfo.pAllocateSystemThreadSurface->BTI; + if (getDefaultKernelInfo().patchInfo.pAllocateSystemThreadSurface) { + return getDefaultKernelInfo().patchInfo.pAllocateSystemThreadSurface->BTI; } return -1; } size_t getPerThreadSystemThreadSurfaceSize() const { - if (kernelInfo.patchInfo.pAllocateSystemThreadSurface) { - return kernelInfo.patchInfo.pAllocateSystemThreadSurface->PerThreadSystemThreadSurfaceSize; + if (getDefaultKernelInfo().patchInfo.pAllocateSystemThreadSurface) { + return getDefaultKernelInfo().patchInfo.pAllocateSystemThreadSurface->PerThreadSystemThreadSurfaceSize; } return 0; } @@ -421,6 +426,7 @@ class Kernel : public BaseObject<_cl_kernel> { } protected: + const KernelInfo &getDefaultKernelInfo() const; struct ObjectCounts { uint32_t imageCount; uint32_t samplerCount; @@ -498,7 +504,7 @@ class Kernel : public BaseObject<_cl_kernel> { void patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const PatchTokenT &patch); void getParentObjectCounts(ObjectCounts &objectCount); - Kernel(Program *programArg, const KernelInfo &kernelInfoArg, bool schedulerKernel = false); + Kernel(Program *programArg, const KernelInfoContainer &kernelInfsoArg, bool schedulerKernel = false); void provideInitializationHints(); void patchBlocksCurbeWithConstantValues(); @@ -519,7 +525,7 @@ class Kernel : public BaseObject<_cl_kernel> { const ExecutionEnvironment &executionEnvironment; Program *program; const ClDeviceVector &deviceVector; - const KernelInfo &kernelInfo; + const KernelInfoContainer kernelInfos; std::vector kernelArguments; std::vector kernelArgHandlers; diff --git a/opencl/source/kernel/kernel.inl b/opencl/source/kernel/kernel.inl index c5265aa90f..84e4a69f87 100644 --- a/opencl/source/kernel/kernel.inl +++ b/opencl/source/kernel/kernel.inl @@ -18,6 +18,8 @@ void Kernel::patchReflectionSurface(DeviceQueue *devQueue, PrintfHandler *printf BlockKernelManager *blockManager = program->getBlockKernelManager(); uint32_t blockCount = static_cast(blockManager->getCount()); + auto rootDeviceIndex = devQueue->getDevice().getRootDeviceIndex(); + auto &kernelInfo = *kernelInfos[rootDeviceIndex]; for (uint32_t i = 0; i < blockCount; i++) { const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); @@ -79,7 +81,7 @@ void Kernel::patchReflectionSurface(DeviceQueue *devQueue, PrintfHandler *printf privateSurfaceOffset, privateSurfacePatchSize, privateSurfaceGpuAddress); } - ReflectionSurfaceHelper::setParentImageParams(reflectionSurface, this->kernelArguments, this->kernelInfo); - ReflectionSurfaceHelper::setParentSamplerParams(reflectionSurface, this->kernelArguments, this->kernelInfo); + ReflectionSurfaceHelper::setParentImageParams(reflectionSurface, this->kernelArguments, kernelInfo); + ReflectionSurfaceHelper::setParentSamplerParams(reflectionSurface, this->kernelArguments, kernelInfo); } } // namespace NEO diff --git a/opencl/source/scheduler/scheduler_kernel.h b/opencl/source/scheduler/scheduler_kernel.h index e5a373cb3e..1b7ecc8538 100644 --- a/opencl/source/scheduler/scheduler_kernel.h +++ b/opencl/source/scheduler/scheduler_kernel.h @@ -34,8 +34,8 @@ class SchedulerKernel : public Kernel { } size_t getCurbeSize() { - size_t crossTrheadDataSize = kernelInfo.patchInfo.dataParameterStream ? kernelInfo.patchInfo.dataParameterStream->DataParameterStreamSize : 0; - size_t dshSize = kernelInfo.heapInfo.DynamicStateHeapSize; + size_t crossTrheadDataSize = getDefaultKernelInfo().patchInfo.dataParameterStream ? getDefaultKernelInfo().patchInfo.dataParameterStream->DataParameterStreamSize : 0; + size_t dshSize = getDefaultKernelInfo().heapInfo.DynamicStateHeapSize; crossTrheadDataSize = alignUp(crossTrheadDataSize, 64); dshSize = alignUp(dshSize, 64); @@ -55,7 +55,7 @@ class SchedulerKernel : public Kernel { static BuiltinCode loadSchedulerKernel(Device *device); protected: - SchedulerKernel(Program *programArg, const KernelInfo &kernelInfoArg) : Kernel(programArg, kernelInfoArg, true) { + SchedulerKernel(Program *programArg, const KernelInfoContainer &kernelInfosArg) : Kernel(programArg, kernelInfosArg, true) { computeGws(); }; diff --git a/opencl/test/unit_test/accelerators/media_image_arg_tests.cpp b/opencl/test/unit_test/accelerators/media_image_arg_tests.cpp index e3f934e55a..806168e0ab 100644 --- a/opencl/test/unit_test/accelerators/media_image_arg_tests.cpp +++ b/opencl/test/unit_test/accelerators/media_image_arg_tests.cpp @@ -47,7 +47,7 @@ class MediaImageSetArgTest : public ClDeviceFixture, pKernelInfo->kernelArgInfo[1].isImage = true; pKernelInfo->kernelArgInfo[0].isImage = true; - pKernel = new MockKernel(program.get(), *pKernelInfo); + pKernel = new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); ASSERT_NE(nullptr, pKernel); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); diff --git a/opencl/test/unit_test/api/cl_api_tests.cpp b/opencl/test/unit_test/api/cl_api_tests.cpp index ffac800901..0b37be0de8 100644 --- a/opencl/test/unit_test/api/cl_api_tests.cpp +++ b/opencl/test/unit_test/api/cl_api_tests.cpp @@ -35,7 +35,7 @@ void api_fixture_using_aligned_memory_manager::SetUp() { program = new MockProgram(ctxPtr, false, toClDeviceVector(*device)); Program *prgPtr = reinterpret_cast(program); - kernel = new MockKernel(prgPtr, program->mockKernelInfo); + kernel = new MockKernel(prgPtr, MockKernel::toKernelInfoContainer(program->mockKernelInfo, 0)); ASSERT_NE(nullptr, kernel); } diff --git a/opencl/test/unit_test/api/cl_api_tests.h b/opencl/test/unit_test/api/cl_api_tests.h index 6bb545699c..60e20e2ef3 100644 --- a/opencl/test/unit_test/api/cl_api_tests.h +++ b/opencl/test/unit_test/api/cl_api_tests.h @@ -49,7 +49,7 @@ struct ApiFixture { pProgram = new MockProgram(pContext, false, toClDeviceVector(*pDevice)); - pKernel = new MockKernel(pProgram, pProgram->mockKernelInfo); + pKernel = new MockKernel(pProgram, MockKernel::toKernelInfoContainer(pProgram->mockKernelInfo, testedRootDeviceIndex)); ASSERT_NE(nullptr, pKernel); } diff --git a/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_khr_tests.inl b/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_khr_tests.inl index 7a9257a220..b8f0b9b5ea 100644 --- a/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_khr_tests.inl +++ b/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_khr_tests.inl @@ -199,7 +199,7 @@ TEST_F(KernelSubGroupInfoKhrTest, GivenNullDeviceWhenGettingSubGroupInfoFromMult MockUnrestrictiveContext context; auto mockProgram = std::make_unique(&context, false, context.getDevices()); - auto mockKernel = std::make_unique(mockProgram.get(), pKernel->getKernelInfo()); + auto mockKernel = std::make_unique(mockProgram.get(), pKernel->getKernelInfos()); retVal = clGetKernelSubGroupInfoKHR( mockKernel.get(), diff --git a/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_tests.inl b/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_tests.inl index 988e225285..5050ef7ffe 100644 --- a/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_tests.inl +++ b/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_tests.inl @@ -348,7 +348,7 @@ TEST_F(KernelSubGroupInfoTest, GivenNullDeviceWhenGettingSubGroupInfoFromMultiDe MockUnrestrictiveContext context; auto mockProgram = std::make_unique(&context, false, context.getDevices()); - auto mockKernel = std::make_unique(mockProgram.get(), pKernel->getKernelInfo()); + auto mockKernel = std::make_unique(mockProgram.get(), pKernel->getKernelInfos()); retVal = clGetKernelSubGroupInfo( mockKernel.get(), diff --git a/opencl/test/unit_test/api/cl_get_kernel_work_group_info_tests.inl b/opencl/test/unit_test/api/cl_get_kernel_work_group_info_tests.inl index 342eb9fa37..eea6181894 100644 --- a/opencl/test/unit_test/api/cl_get_kernel_work_group_info_tests.inl +++ b/opencl/test/unit_test/api/cl_get_kernel_work_group_info_tests.inl @@ -134,7 +134,7 @@ TEST_F(clGetKernelWorkGroupInfoTest, GivenNullDeviceWhenGettingWorkGroupInfoFrom size_t paramValueSizeRet; MockUnrestrictiveContext context; auto mockProgram = std::make_unique(&context, false, context.getDevices()); - auto mockKernel = std::make_unique(mockProgram.get(), pKernel->getKernelInfo()); + auto mockKernel = std::make_unique(mockProgram.get(), MockKernel::toKernelInfoContainer(pKernel->getKernelInfo(), context.getDevice(0)->getRootDeviceIndex())); retVal = clGetKernelWorkGroupInfo( mockKernel.get(), diff --git a/opencl/test/unit_test/api/cl_set_kernel_arg_svm_pointer_tests.inl b/opencl/test/unit_test/api/cl_set_kernel_arg_svm_pointer_tests.inl index 85fff6c273..fe601ac485 100644 --- a/opencl/test/unit_test/api/cl_set_kernel_arg_svm_pointer_tests.inl +++ b/opencl/test/unit_test/api/cl_set_kernel_arg_svm_pointer_tests.inl @@ -39,7 +39,7 @@ class KernelArgSvmFixture : public ApiFixture<> { pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].size = (uint32_t)sizeof(void *); pKernelInfo->kernelArgInfo[0].metadata.addressQualifier = KernelArgMetadata::AddrGlobal; - pMockKernel = new MockKernel(pProgram, *pKernelInfo); + pMockKernel = new MockKernel(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, testedRootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, pMockKernel->initialize()); pMockKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); } @@ -85,7 +85,7 @@ TEST_F(clSetKernelArgSVMPointerTests, GivenDeviceNotSupportingSvmWhenSettingKern auto hwInfo = executionEnvironment->rootDeviceEnvironments[ApiFixture::testedRootDeviceIndex]->getMutableHardwareInfo(); hwInfo->capabilityTable.ftrSvm = false; - auto pMockKernel = std::make_unique(pProgram, *pKernelInfo); + auto pMockKernel = std::make_unique(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, testedRootDeviceIndex)); auto retVal = clSetKernelArgSVMPointer( pMockKernel.get(), // cl_kernel kernel (cl_uint)-1, // cl_uint arg_index diff --git a/opencl/test/unit_test/api/cl_set_kernel_exec_info_tests.inl b/opencl/test/unit_test/api/cl_set_kernel_exec_info_tests.inl index ff7f7ed2c2..49a6043770 100644 --- a/opencl/test/unit_test/api/cl_set_kernel_exec_info_tests.inl +++ b/opencl/test/unit_test/api/cl_set_kernel_exec_info_tests.inl @@ -21,7 +21,7 @@ class KernelExecInfoFixture : public ApiFixture<> { pKernelInfo = std::make_unique(); - pMockKernel = new MockKernel(pProgram, *pKernelInfo); + pMockKernel = new MockKernel(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, testedRootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, pMockKernel->initialize()); svmCapabilities = pDevice->getDeviceInfo().svmCapabilities; if (svmCapabilities != 0) { @@ -67,7 +67,7 @@ TEST_F(clSetKernelArgSVMPointerTests, GivenDeviceNotSupportingSvmWhenSettingKern auto hwInfo = executionEnvironment->rootDeviceEnvironments[ApiFixture::testedRootDeviceIndex]->getMutableHardwareInfo(); hwInfo->capabilityTable.ftrSvm = false; - auto pMockKernel = std::make_unique(pProgram, *pKernelInfo); + auto pMockKernel = std::make_unique(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, testedRootDeviceIndex)); auto retVal = clSetKernelExecInfo( pMockKernel.get(), // cl_kernel kernel CL_KERNEL_EXEC_INFO_SVM_PTRS, // cl_kernel_exec_info param_name diff --git a/opencl/test/unit_test/aub_tests/gen9/skl/command_queue/run_kernel_aub_tests_skl.cpp b/opencl/test/unit_test/aub_tests/gen9/skl/command_queue/run_kernel_aub_tests_skl.cpp index e75cad4aa7..eb2c912a8b 100644 --- a/opencl/test/unit_test/aub_tests/gen9/skl/command_queue/run_kernel_aub_tests_skl.cpp +++ b/opencl/test/unit_test/aub_tests/gen9/skl/command_queue/run_kernel_aub_tests_skl.cpp @@ -53,7 +53,7 @@ SKLTEST_F(AUBRunKernelIntegrateTest, ooqExecution) { Kernel *pKernel0 = Kernel::create( pProgram, - *pKernelInfo0, + MockKernel::toKernelInfoContainer(*pKernelInfo0, rootDeviceIndex), &retVal); ASSERT_NE(nullptr, pKernel0); @@ -62,7 +62,7 @@ SKLTEST_F(AUBRunKernelIntegrateTest, ooqExecution) { Kernel *pKernel1 = Kernel::create( pProgram, - *pKernelInfo1, + MockKernel::toKernelInfoContainer(*pKernelInfo1, rootDeviceIndex), &retVal); ASSERT_NE(nullptr, pKernel1); @@ -71,7 +71,7 @@ SKLTEST_F(AUBRunKernelIntegrateTest, ooqExecution) { Kernel *pKernel2 = Kernel::create( pProgram, - *pKernelInfo2, + MockKernel::toKernelInfoContainer(*pKernelInfo2, rootDeviceIndex), &retVal); ASSERT_NE(nullptr, pKernel2); @@ -276,7 +276,7 @@ SKLTEST_F(AUBRunKernelIntegrateTest, deviceSideVme) { Kernel *pKernel = Kernel::create( pProgram, - *pKernelInfo, + MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), &retVal); ASSERT_NE(pKernel, nullptr); diff --git a/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp b/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp index 081fc7481c..d1ddbe9d17 100644 --- a/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp +++ b/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp @@ -147,7 +147,7 @@ HWTEST_F(DispatchWalkerTest, givenSimd1WhenSetGpgpuWalkerThreadDataThenSimdInWal } HWTEST_F(DispatchWalkerTest, WhenDispatchingWalkerThenCommandStreamMemoryIsntChanged) { - MockKernel kernel(program.get(), kernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); auto &commandStream = pCmdQ->getCS(4096); @@ -194,7 +194,7 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalIdsWhenDispatchingWalkerThenWalkerIsDis threadPayload.LocalIDZPresent = 0; threadPayload.UnusedPerThreadConstantPresent = 1; - MockKernel kernel(program.get(), kernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); auto &commandStream = pCmdQ->getCS(4096); @@ -236,7 +236,7 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalIdsWhenDispatchingWalkerThenWalkerIsDis } HWTEST_F(DispatchWalkerTest, GivenDefaultLwsAlgorithmWhenDispatchingWalkerThenDimensionsAreCorrect) { - MockKernel kernel(program.get(), kernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); kernelInfo.workloadInfo.workDimOffset = 0; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); @@ -267,7 +267,7 @@ HWTEST_F(DispatchWalkerTest, GivenSquaredLwsAlgorithmWhenDispatchingWalkerThenDi DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeND.set(false); DebugManager.flags.EnableComputeWorkSizeSquared.set(true); - MockKernel kernel(program.get(), kernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); kernelInfo.workloadInfo.workDimOffset = 0; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); @@ -295,7 +295,7 @@ HWTEST_F(DispatchWalkerTest, GivenSquaredLwsAlgorithmWhenDispatchingWalkerThenDi HWTEST_F(DispatchWalkerTest, GivenNdLwsAlgorithmWhenDispatchingWalkerThenDimensionsAreCorrect) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeND.set(true); - MockKernel kernel(program.get(), kernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); kernelInfo.workloadInfo.workDimOffset = 0; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); @@ -324,7 +324,7 @@ HWTEST_F(DispatchWalkerTest, GivenOldLwsAlgorithmWhenDispatchingWalkerThenDimens DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeND.set(false); DebugManager.flags.EnableComputeWorkSizeSquared.set(false); - MockKernel kernel(program.get(), kernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); kernelInfo.workloadInfo.workDimOffset = 0; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); @@ -350,7 +350,7 @@ HWTEST_F(DispatchWalkerTest, GivenOldLwsAlgorithmWhenDispatchingWalkerThenDimens } HWTEST_F(DispatchWalkerTest, GivenNumWorkGroupsWhenDispatchingWalkerThenNumWorkGroupsIsCorrectlySet) { - MockKernel kernel(program.get(), kernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); kernelInfo.workloadInfo.numWorkGroupsOffset[0] = 0; kernelInfo.workloadInfo.numWorkGroupsOffset[1] = 4; kernelInfo.workloadInfo.numWorkGroupsOffset[2] = 8; @@ -383,7 +383,7 @@ HWTEST_F(DispatchWalkerTest, GivenNumWorkGroupsWhenDispatchingWalkerThenNumWorkG HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndDefaultAlgorithmWhenDispatchingWalkerThenLwsIsCorrect) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeND.set(false); - MockKernel kernel(program.get(), kernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); kernelInfo.workloadInfo.localWorkSizeOffsets[0] = 0; kernelInfo.workloadInfo.localWorkSizeOffsets[1] = 4; kernelInfo.workloadInfo.localWorkSizeOffsets[2] = 8; @@ -413,7 +413,7 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndDefaultAlgorithmWhenDispatch HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndNdOnWhenDispatchingWalkerThenLwsIsCorrect) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeND.set(true); - MockKernel kernel(program.get(), kernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); kernelInfo.workloadInfo.localWorkSizeOffsets[0] = 0; kernelInfo.workloadInfo.localWorkSizeOffsets[1] = 4; kernelInfo.workloadInfo.localWorkSizeOffsets[2] = 8; @@ -444,7 +444,7 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndSquaredAlgorithmWhenDispatch DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(true); DebugManager.flags.EnableComputeWorkSizeND.set(false); - MockKernel kernel(program.get(), kernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); kernelInfo.workloadInfo.localWorkSizeOffsets[0] = 0; kernelInfo.workloadInfo.localWorkSizeOffsets[1] = 4; kernelInfo.workloadInfo.localWorkSizeOffsets[2] = 8; @@ -475,7 +475,7 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndSquaredAlgorithmOffAndNdOffW DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(false); DebugManager.flags.EnableComputeWorkSizeND.set(false); - MockKernel kernel(program.get(), kernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); kernelInfo.workloadInfo.localWorkSizeOffsets[0] = 0; kernelInfo.workloadInfo.localWorkSizeOffsets[1] = 4; kernelInfo.workloadInfo.localWorkSizeOffsets[2] = 8; @@ -503,7 +503,7 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeAndSquaredAlgorithmOffAndNdOffW } HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeWhenDispatchingWalkerThenLwsIsCorrect) { - MockKernel kernel(program.get(), kernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); kernelInfo.workloadInfo.localWorkSizeOffsets[0] = 0; kernelInfo.workloadInfo.localWorkSizeOffsets[1] = 4; kernelInfo.workloadInfo.localWorkSizeOffsets[2] = 8; @@ -532,7 +532,7 @@ HWTEST_F(DispatchWalkerTest, GivenNoLocalWorkSizeWhenDispatchingWalkerThenLwsIsC } HWTEST_F(DispatchWalkerTest, GivenTwoSetsOfLwsOffsetsWhenDispatchingWalkerThenLwsIsCorrect) { - MockKernel kernel(program.get(), kernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); kernelInfo.workloadInfo.localWorkSizeOffsets[0] = 0; kernelInfo.workloadInfo.localWorkSizeOffsets[1] = 4; kernelInfo.workloadInfo.localWorkSizeOffsets[2] = 8; @@ -567,13 +567,13 @@ HWTEST_F(DispatchWalkerTest, GivenTwoSetsOfLwsOffsetsWhenDispatchingWalkerThenLw } HWTEST_F(DispatchWalkerTest, GivenSplitKernelWhenDispatchingWalkerThenLwsIsCorrect) { - MockKernel kernel1(program.get(), kernelInfo); + MockKernel kernel1(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); kernelInfo.workloadInfo.localWorkSizeOffsets[0] = 0; kernelInfo.workloadInfo.localWorkSizeOffsets[1] = 4; kernelInfo.workloadInfo.localWorkSizeOffsets[2] = 8; ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); - MockKernel kernel2(program.get(), kernelInfoWithSampler); + MockKernel kernel2(program.get(), MockKernel::toKernelInfoContainer(kernelInfoWithSampler, rootDeviceIndex)); kernelInfoWithSampler.workloadInfo.localWorkSizeOffsets[0] = 12; kernelInfoWithSampler.workloadInfo.localWorkSizeOffsets[1] = 16; kernelInfoWithSampler.workloadInfo.localWorkSizeOffsets[2] = 20; @@ -613,8 +613,8 @@ HWTEST_F(DispatchWalkerTest, GivenSplitKernelWhenDispatchingWalkerThenLwsIsCorre } HWTEST_F(DispatchWalkerTest, GivenSplitWalkerWhenDispatchingWalkerThenLwsIsCorrect) { - MockKernel kernel1(program.get(), kernelInfo); - MockKernel mainKernel(program.get(), kernelInfo); + MockKernel kernel1(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); + MockKernel mainKernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); kernelInfo.workloadInfo.localWorkSizeOffsets[0] = 0; kernelInfo.workloadInfo.localWorkSizeOffsets[1] = 4; kernelInfo.workloadInfo.localWorkSizeOffsets[2] = 8; @@ -672,7 +672,7 @@ HWTEST_F(DispatchWalkerTest, GivenSplitWalkerWhenDispatchingWalkerThenLwsIsCorre } HWTEST_F(DispatchWalkerTest, GivenBlockedQueueWhenDispatchingWalkerThenCommandSteamIsNotConsumed) { - MockKernel kernel(program.get(), kernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; @@ -706,7 +706,7 @@ HWTEST_F(DispatchWalkerTest, GivenBlockedQueueWhenDispatchingWalkerThenCommandSt } HWTEST_F(DispatchWalkerTest, GivenBlockedQueueWhenDispatchingWalkerThenRequiredHeaSizesAreTakenFromKernel) { - MockKernel kernel(program.get(), kernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; @@ -764,7 +764,7 @@ HWTEST_F(DispatchWalkerTest, givenBlockedEnqueueWhenObtainingCommandStreamThenAl } HWTEST_F(DispatchWalkerTest, GivenBlockedQueueWhenDispatchingWalkerThenRequiredHeapSizesAreTakenFromMdi) { - MockKernel kernel(program.get(), kernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); MockMultiDispatchInfo multiDispatchInfo(pClDevice, &kernel); @@ -792,7 +792,7 @@ HWTEST_F(DispatchWalkerTest, GivenBlockedQueueWhenDispatchingWalkerThenRequiredH } HWTEST_F(DispatchWalkerTest, givenBlockedQueueWhenDispatchWalkerIsCalledThenCommandStreamHasGpuAddress) { - MockKernel kernel(program.get(), kernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); MockMultiDispatchInfo multiDispatchInfo(pClDevice, &kernel); @@ -813,7 +813,7 @@ HWTEST_F(DispatchWalkerTest, givenBlockedQueueWhenDispatchWalkerIsCalledThenComm } HWTEST_F(DispatchWalkerTest, givenThereAreAllocationsForReuseWhenDispatchWalkerIsCalledThenCommandStreamObtainsReusableAllocation) { - MockKernel kernel(program.get(), kernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); MockMultiDispatchInfo multiDispatchInfo(pClDevice, &kernel); @@ -840,9 +840,9 @@ HWTEST_F(DispatchWalkerTest, givenThereAreAllocationsForReuseWhenDispatchWalkerI } HWTEST_F(DispatchWalkerTest, GivenMultipleKernelsWhenDispatchingWalkerThenWorkDimensionsAreCorrect) { - MockKernel kernel1(program.get(), kernelInfo); + MockKernel kernel1(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); - MockKernel kernel2(program.get(), kernelInfo); + MockKernel kernel2(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel2.initialize()); MockMultiDispatchInfo multiDispatchInfo(pClDevice, std::vector({&kernel1, &kernel2})); @@ -875,9 +875,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, GivenMultipleKernelsWhenDispatch auto gpuAddress1 = kernelIsaAllocation->getGpuAddressToPatch(); auto gpuAddress2 = kernelIsaWithSamplerAllocation->getGpuAddressToPatch(); - MockKernel kernel1(program.get(), kernelInfo); + MockKernel kernel1(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); - MockKernel kernel2(program.get(), kernelInfoWithSampler); + MockKernel kernel2(program.get(), MockKernel::toKernelInfoContainer(kernelInfoWithSampler, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel2.initialize()); MockMultiDispatchInfo multiDispatchInfo(pClDevice, std::vector({&kernel1, &kernel2})); @@ -966,9 +966,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, GivenMultipleKernelsWhenDispatch HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, GivenMultipleKernelsWhenDispatchingWalkerThenGpgpuWalkerIdOffsetIsProgrammedCorrectly) { using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; - MockKernel kernel1(program.get(), kernelInfo); + MockKernel kernel1(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); - MockKernel kernel2(program.get(), kernelInfoWithSampler); + MockKernel kernel2(program.get(), MockKernel::toKernelInfoContainer(kernelInfoWithSampler, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel2.initialize()); MockMultiDispatchInfo multiDispatchInfo(pClDevice, std::vector({&kernel1, &kernel2})); @@ -1011,9 +1011,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, GivenMultipleKernelsWhenDispatch HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, GivenMultipleKernelsWhenDispatchingWalkerThenThreadGroupIdStartingCoordinatesAreProgrammedCorrectly) { using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; - MockKernel kernel1(program.get(), kernelInfo); + MockKernel kernel1(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); - MockKernel kernel2(program.get(), kernelInfoWithSampler); + MockKernel kernel2(program.get(), MockKernel::toKernelInfoContainer(kernelInfoWithSampler, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel2.initialize()); MockMultiDispatchInfo multiDispatchInfo(pClDevice, std::vector({&kernel1, &kernel2})); @@ -1060,7 +1060,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, GivenMultipleKernelsWhenDispatch HWCMDTEST_F(IGFX_GEN8_CORE, DispatchWalkerTest, GivenMultipleDispatchInfoAndSameKernelWhenDispatchingWalkerThenGpgpuWalkerThreadGroupIdStartingCoordinatesAreCorrectlyProgrammed) { using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; - MockKernel kernel(program.get(), kernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); DispatchInfo di1(pClDevice, &kernel, 1, {100, 1, 1}, {10, 1, 1}, {0, 0, 0}, {100, 1, 1}, {10, 1, 1}, {10, 1, 1}, {10, 1, 1}, {0, 0, 0}); @@ -1113,7 +1113,7 @@ HWTEST_F(DispatchWalkerTest, GivenCacheFlushAfterWalkerDisabledWhenAllocationReq DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(0); - MockKernel kernel1(program.get(), kernelInfo); + MockKernel kernel1(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); kernel1.kernelArgRequiresCacheFlush.resize(1); MockGraphicsAllocation cacheRequiringAllocation; @@ -1146,9 +1146,9 @@ HWTEST_F(DispatchWalkerTest, GivenCacheFlushAfterWalkerEnabledWhenWalkerWithTwoK DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); - MockKernel kernel1(program.get(), kernelInfo); + MockKernel kernel1(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); - MockKernel kernel2(program.get(), kernelInfoWithSampler); + MockKernel kernel2(program.get(), MockKernel::toKernelInfoContainer(kernelInfoWithSampler, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel2.initialize()); kernel1.kernelArgRequiresCacheFlush.resize(1); @@ -1184,9 +1184,9 @@ HWTEST_F(DispatchWalkerTest, GivenCacheFlushAfterWalkerEnabledWhenTwoWalkersForQ DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(1); - MockKernel kernel1(program.get(), kernelInfo); + MockKernel kernel1(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel1.initialize()); - MockKernel kernel2(program.get(), kernelInfoWithSampler); + MockKernel kernel2(program.get(), MockKernel::toKernelInfoContainer(kernelInfoWithSampler, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel2.initialize()); kernel1.kernelArgRequiresCacheFlush.resize(1); @@ -1249,7 +1249,7 @@ HWTEST_F(DispatchWalkerTest, givenKernelWhenAuxToNonAuxWhenTranslationRequiredTh BuiltinDispatchInfoBuilder &baseBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pClDevice); auto &builder = static_cast &>(baseBuilder); - MockKernel kernel(program.get(), kernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); kernelInfo.workloadInfo.workDimOffset = 0; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); @@ -1302,7 +1302,7 @@ HWTEST_F(DispatchWalkerTest, givenKernelWhenNonAuxToAuxWhenTranslationRequiredTh BuiltinDispatchInfoBuilder &baseBuilder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pClDevice); auto &builder = static_cast &>(baseBuilder); - MockKernel kernel(program.get(), kernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); kernelInfo.workloadInfo.workDimOffset = 0; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); diff --git a/opencl/test/unit_test/command_queue/enqueue_debug_kernel_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_debug_kernel_tests.cpp index a8f1824230..ce1d76488e 100644 --- a/opencl/test/unit_test/command_queue/enqueue_debug_kernel_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_debug_kernel_tests.cpp @@ -53,7 +53,7 @@ class EnqueueDebugKernelTest : public ProgramSimpleFixture, // create a kernel debugKernel = Kernel::create( pProgram, - *pProgram->getKernelInfo("CopyBuffer", rootDeviceIndex), + pProgram->getKernelInfosForKernel("CopyBuffer"), &retVal); ASSERT_EQ(CL_SUCCESS, retVal); diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp index 2c5c583f70..cf6356c4ed 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp @@ -47,7 +47,7 @@ TEST_F(EnqueueKernelTest, givenKernelWhenAllArgsAreSetThenClEnqueueNDRangeKernel cl_int retVal = CL_SUCCESS; CommandQueue *pCmdQ2 = createCommandQueue(pClDevice); - std::unique_ptr kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer", rootDeviceIndex), &retVal)); + std::unique_ptr kernel(Kernel::create(pProgram, pProgram->getKernelInfosForKernel("CopyBuffer"), &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); auto b0 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); @@ -86,7 +86,7 @@ TEST_F(EnqueueKernelTest, givenKernelWhenNotAllArgsAreSetButSetKernelArgIsCalled cl_int retVal = CL_SUCCESS; CommandQueue *pCmdQ2 = createCommandQueue(pClDevice); - std::unique_ptr kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer", rootDeviceIndex), &retVal)); + std::unique_ptr kernel(Kernel::create(pProgram, pProgram->getKernelInfosForKernel("CopyBuffer"), &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); auto b0 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); @@ -125,7 +125,7 @@ TEST_F(EnqueueKernelTest, givenKernelWhenSetKernelArgIsCalledForEachArgButAtLeas cl_int retVal = CL_SUCCESS; CommandQueue *pCmdQ2 = createCommandQueue(pClDevice); - std::unique_ptr kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer", rootDeviceIndex), &retVal)); + std::unique_ptr kernel(Kernel::create(pProgram, pProgram->getKernelInfosForKernel("CopyBuffer"), &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); auto b0 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); @@ -248,7 +248,7 @@ TEST_F(EnqueueKernelTest, givenKernelWhenAllArgsAreSetThenClEnqueueNDCountKernel pCmdQ2->getGpgpuEngine().osContext = pCmdQ2->getDevice().getEngine(aub_stream::ENGINE_CCS, true, false).osContext; } - std::unique_ptr kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer", rootDeviceIndex), &retVal)); + std::unique_ptr kernel(Kernel::create(pProgram, pProgram->getKernelInfosForKernel("CopyBuffer"), &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); auto b0 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); @@ -292,7 +292,7 @@ TEST_F(EnqueueKernelTest, givenKernelWhenNotAllArgsAreSetButSetKernelArgIsCalled pCmdQ2->getGpgpuEngine().osContext = pCmdQ2->getDevice().getEngine(aub_stream::ENGINE_CCS, true, false).osContext; } - std::unique_ptr kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer", rootDeviceIndex), &retVal)); + std::unique_ptr kernel(Kernel::create(pProgram, pProgram->getKernelInfosForKernel("CopyBuffer"), &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); auto b0 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); @@ -336,7 +336,7 @@ TEST_F(EnqueueKernelTest, givenKernelWhenSetKernelArgIsCalledForEachArgButAtLeas pCmdQ2->getGpgpuEngine().osContext = pCmdQ2->getDevice().getEngine(aub_stream::ENGINE_CCS, true, false).osContext; } - std::unique_ptr kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer", rootDeviceIndex), &retVal)); + std::unique_ptr kernel(Kernel::create(pProgram, pProgram->getKernelInfosForKernel("CopyBuffer"), &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); auto b0 = clCreateBuffer(context, 0, n * sizeof(float), nullptr, nullptr); @@ -1263,7 +1263,7 @@ TEST_F(EnqueueKernelTest, givenKernelWhenAllArgsAreNotAndEventExistSetThenClEnqu cl_int retVal = CL_SUCCESS; CommandQueue *pCmdQ2 = createCommandQueue(pClDevice); - std::unique_ptr kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer", rootDeviceIndex), &retVal)); + std::unique_ptr kernel(Kernel::create(pProgram, pProgram->getKernelInfosForKernel("CopyBuffer"), &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_FALSE(kernel->isPatched()); diff --git a/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp index 1f13bee2c2..3e4c08b47a 100644 --- a/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp @@ -736,9 +736,9 @@ TEST_F(EnqueueSvmTest, GivenSvmAllocationWhenEnqueingKernelThenSuccessIsReturned GraphicsAllocation *pSvmAlloc = svmData->gpuAllocations.getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex()); EXPECT_NE(nullptr, ptrSVM); - std::unique_ptr program(Program::createBuiltInFromSource("FillBufferBytes", context, context->getDevices(), &retVal)); + std::unique_ptr program(Program::createBuiltInFromSource("FillBufferBytes", context, context->getDevices(), &retVal)); program->build(program->getDevices(), nullptr, false); - std::unique_ptr kernel(Kernel::create(program.get(), *program->getKernelInfo("FillBufferBytes", rootDeviceIndex), &retVal)); + std::unique_ptr kernel(Kernel::create(program.get(), program->getKernelInfosForKernel("FillBufferBytes"), &retVal)); kernel->setSvmKernelExecInfo(pSvmAlloc); @@ -764,9 +764,9 @@ TEST_F(EnqueueSvmTest, givenEnqueueTaskBlockedOnUserEventWhenItIsEnqueuedThenSur GraphicsAllocation *pSvmAlloc = svmData->gpuAllocations.getGraphicsAllocation(context->getDevice(0)->getRootDeviceIndex()); EXPECT_NE(nullptr, ptrSVM); - auto program = clUniquePtr(Program::createBuiltInFromSource("FillBufferBytes", context, context->getDevices(), &retVal)); + auto program = clUniquePtr(Program::createBuiltInFromSource("FillBufferBytes", context, context->getDevices(), &retVal)); program->build(program->getDevices(), nullptr, false); - auto kernel = clUniquePtr(Kernel::create(program.get(), *program->getKernelInfo("FillBufferBytes", rootDeviceIndex), &retVal)); + auto kernel = clUniquePtr(Kernel::create(program.get(), program->getKernelInfosForKernel("FillBufferBytes"), &retVal)); std::vector allSurfaces; kernel->getResidency(allSurfaces, rootDeviceIndex); diff --git a/opencl/test/unit_test/command_queue/get_size_required_image_tests.cpp b/opencl/test/unit_test/command_queue/get_size_required_image_tests.cpp index ce065a9b78..d328bb5521 100644 --- a/opencl/test/unit_test/command_queue/get_size_required_image_tests.cpp +++ b/opencl/test/unit_test/command_queue/get_size_required_image_tests.cpp @@ -119,9 +119,9 @@ HWTEST_F(GetSizeRequiredImageTest, WhenCopyingReadWriteImageThenHeapsAndCommandB auto usedBeforeIOH = ioh.getUsed(); auto usedBeforeSSH = ssh.getUsed(); - std::unique_ptr program(Program::createBuiltInFromSource("CopyImageToImage3d", context, context->getDevices(), nullptr)); + std::unique_ptr program(Program::createBuiltInFromSource("CopyImageToImage3d", context, context->getDevices(), nullptr)); program->build(program->getDevices(), nullptr, false); - std::unique_ptr kernel(Kernel::create(program.get(), *program->getKernelInfo("CopyImageToImage3d", rootDeviceIndex), nullptr)); + std::unique_ptr kernel(Kernel::create(program.get(), program->getKernelInfosForKernel("CopyImageToImage3d"), nullptr)); EXPECT_NE(nullptr, kernel); // This kernel does not operate on OpenCL 2.0 Read and Write images diff --git a/opencl/test/unit_test/context/driver_diagnostics_tests.cpp b/opencl/test/unit_test/context/driver_diagnostics_tests.cpp index 42ddd1ac06..e4e1dc30be 100644 --- a/opencl/test/unit_test/context/driver_diagnostics_tests.cpp +++ b/opencl/test/unit_test/context/driver_diagnostics_tests.cpp @@ -724,7 +724,7 @@ TEST_P(PerformanceHintKernelTest, GivenSpillFillWhenKernelIsInitializedThenConte } TEST_P(PerformanceHintKernelTest, GivenPrivateSurfaceWhenKernelIsInitializedThenContextProvidesProperHint) { - auto pDevice = castToObject(devices[0]); + auto pDevice = castToObject(devices[1]); static_cast(pDevice->getMemoryManager())->turnOnFakingBigAllocations(); for (auto isSmitThread : {false, true}) { diff --git a/opencl/test/unit_test/context/driver_diagnostics_tests.h b/opencl/test/unit_test/context/driver_diagnostics_tests.h index 24a80a730e..8b648016e8 100644 --- a/opencl/test/unit_test/context/driver_diagnostics_tests.h +++ b/opencl/test/unit_test/context/driver_diagnostics_tests.h @@ -7,6 +7,7 @@ #pragma once #include "shared/source/helpers/aligned_memory.h" +#include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/mocks/mock_device.h" #include "shared/test/unit_test/test_macros/test_checks_shared.h" @@ -73,9 +74,8 @@ struct PerformanceHintTest : public DriverDiagnosticsTest, void SetUp() override { DriverDiagnosticsTest::SetUp(); - cl_device_id deviceID = devices[0]; cl_context_properties validProperties[3] = {CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL, 0}; - context = Context::create(validProperties, ClDeviceVector(&deviceID, 1), callbackFunction, (void *)userData, retVal); + context = Context::create(validProperties, ClDeviceVector(devices, num_devices), callbackFunction, (void *)userData, retVal); EXPECT_EQ(CL_SUCCESS, retVal); } @@ -230,7 +230,7 @@ struct PerformanceHintEnqueueKernelTest : public PerformanceHintEnqueueTest, CreateProgramFromBinary(context, context->getDevices(), "CopyBuffer_simd32"); retVal = pProgram->build(pProgram->getDevices(), nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); - kernel = Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer", context->getDevice(0)->getRootDeviceIndex()), &retVal); + kernel = Kernel::create(pProgram, pProgram->getKernelInfosForKernel("CopyBuffer"), &retVal); globalWorkGroupSize[0] = globalWorkGroupSize[1] = globalWorkGroupSize[2] = 1; } @@ -265,7 +265,7 @@ struct PerformanceHintEnqueueKernelPrintfTest : public PerformanceHintEnqueueTes CreateProgramFromBinary(context, context->getDevices(), "printf"); retVal = pProgram->build(pProgram->getDevices(), nullptr, false); ASSERT_EQ(CL_SUCCESS, retVal); - kernel = Kernel::create(pProgram, *pProgram->getKernelInfo("test", context->getDevice(0)->getRootDeviceIndex()), &retVal); + kernel = Kernel::create(pProgram, pProgram->getKernelInfosForKernel("test"), &retVal); globalWorkGroupSize[0] = globalWorkGroupSize[1] = globalWorkGroupSize[2] = 1; } @@ -283,6 +283,8 @@ struct PerformanceHintKernelTest : public PerformanceHintTest, public ::testing::WithParamInterface { void SetUp() override { + DebugManager.flags.CreateMultipleRootDevices.set(2); + DebugManager.flags.EnableMultiRootDeviceContexts.set(true); PerformanceHintTest::SetUp(); zeroSized = GetParam(); } @@ -290,5 +292,6 @@ struct PerformanceHintKernelTest : public PerformanceHintTest, void TearDown() override { PerformanceHintTest::TearDown(); } + DebugManagerStateRestore restorer; bool zeroSized = false; }; diff --git a/opencl/test/unit_test/device_queue/device_queue_tests.cpp b/opencl/test/unit_test/device_queue/device_queue_tests.cpp index 2342fedc6f..404b2722b9 100644 --- a/opencl/test/unit_test/device_queue/device_queue_tests.cpp +++ b/opencl/test/unit_test/device_queue/device_queue_tests.cpp @@ -287,11 +287,14 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueTest, WhenDeviceQueueIsCreatedThenDshBuff HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueTest, WhenDispatchingSchedulerThenNoAssertsOccur) { DeviceQueue devQueue; - MockContext context; MockProgram program(toClDeviceVector(*device)); MockCommandQueue cmdQ(nullptr, nullptr, 0); KernelInfo info; - MockSchedulerKernel *kernel = new MockSchedulerKernel(&program, info); + KernelInfoContainer kernelInfos; + auto rootDeviceIndex = device->getRootDeviceIndex(); + kernelInfos.resize(rootDeviceIndex + 1); + kernelInfos[rootDeviceIndex] = &info; + MockSchedulerKernel *kernel = new MockSchedulerKernel(&program, kernelInfos); LinearStream cmdStream; devQueue.dispatchScheduler(cmdStream, *kernel, device->getPreemptionMode(), nullptr, nullptr, false); diff --git a/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp b/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp index 05bfa93aff..2f738e70de 100644 --- a/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp +++ b/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp @@ -326,7 +326,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu ASSERT_NE(nullptr, pBlockInfo->patchInfo.executionEnvironment); ASSERT_NE(nullptr, pBlockInfo->patchInfo.threadPayload); - Kernel *blockKernel = Kernel::create(pKernel->getProgram(), *pBlockInfo, nullptr); + Kernel *blockKernel = Kernel::create(pKernel->getProgram(), MockKernel::toKernelInfoContainer(*pBlockInfo, rootDeviceIndex), nullptr); blockSSH = alignUp(blockSSH, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); if (blockKernel->getNumberOfBindingTableStates() > 0) { ASSERT_NE(nullptr, pBlockInfo->patchInfo.bindingTableState); diff --git a/opencl/test/unit_test/fixtures/cl_preemption_fixture.cpp b/opencl/test/unit_test/fixtures/cl_preemption_fixture.cpp index 6fd3a963be..49fc95b9f8 100644 --- a/opencl/test/unit_test/fixtures/cl_preemption_fixture.cpp +++ b/opencl/test/unit_test/fixtures/cl_preemption_fixture.cpp @@ -36,14 +36,14 @@ void DevicePreemptionTests::SetUp() { } const cl_queue_properties properties[3] = {CL_QUEUE_PROPERTIES, 0, 0}; kernelInfo = std::make_unique(); - device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); + device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr, rootDeviceIndex)); context.reset(new MockContext(device.get())); cmdQ.reset(new MockCommandQueue(context.get(), device.get(), properties)); executionEnvironment.reset(new SPatchExecutionEnvironment); memset(executionEnvironment.get(), 0, sizeof(SPatchExecutionEnvironment)); kernelInfo->patchInfo.executionEnvironment = executionEnvironment.get(); program = std::make_unique(toClDeviceVector(*device)); - kernel.reset(new MockKernel(program.get(), *kernelInfo)); + kernel.reset(new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, rootDeviceIndex))); dispatchInfo.reset(new DispatchInfo(device.get(), kernel.get(), 1, Vec3(1, 1, 1), Vec3(1, 1, 1), Vec3(0, 0, 0))); ASSERT_NE(nullptr, device); diff --git a/opencl/test/unit_test/fixtures/cl_preemption_fixture.h b/opencl/test/unit_test/fixtures/cl_preemption_fixture.h index 75222a85f3..72fb9da382 100644 --- a/opencl/test/unit_test/fixtures/cl_preemption_fixture.h +++ b/opencl/test/unit_test/fixtures/cl_preemption_fixture.h @@ -53,6 +53,7 @@ class DevicePreemptionTests : public ::testing::Test { std::unique_ptr executionEnvironment; std::unique_ptr program; std::unique_ptr kernelInfo; + const uint32_t rootDeviceIndex = 0u; }; struct ThreadGroupPreemptionEnqueueKernelTest : NEO::PreemptionEnqueueKernelTest { diff --git a/opencl/test/unit_test/fixtures/execution_model_kernel_fixture.h b/opencl/test/unit_test/fixtures/execution_model_kernel_fixture.h index 52f9e5fbfc..032c035f5b 100644 --- a/opencl/test/unit_test/fixtures/execution_model_kernel_fixture.h +++ b/opencl/test/unit_test/fixtures/execution_model_kernel_fixture.h @@ -40,7 +40,7 @@ struct ExecutionModelKernelFixture : public ProgramFromBinaryFixture, // create a kernel pKernel = Kernel::create( pProgram, - *pProgram->getKernelInfo(kernelName, rootDeviceIndex), + pProgram->getKernelInfosForKernel(kernelName), &retVal); ASSERT_EQ(CL_SUCCESS, retVal); diff --git a/opencl/test/unit_test/fixtures/hello_world_kernel_fixture.h b/opencl/test/unit_test/fixtures/hello_world_kernel_fixture.h index fd745471f5..0798edf260 100644 --- a/opencl/test/unit_test/fixtures/hello_world_kernel_fixture.h +++ b/opencl/test/unit_test/fixtures/hello_world_kernel_fixture.h @@ -77,7 +77,7 @@ struct HelloWorldKernelFixture : public ProgramFixture { // create a kernel pKernel = Kernel::create( pProgram, - *pProgram->getKernelInfo(pKernelName->c_str(), pDevice->getRootDeviceIndex()), + pProgram->getKernelInfosForKernel(pKernelName->c_str()), &retVal); EXPECT_NE(nullptr, pKernel); diff --git a/opencl/test/unit_test/fixtures/kernel_arg_fixture.cpp b/opencl/test/unit_test/fixtures/kernel_arg_fixture.cpp index 8a9ea1e4fc..57c7670208 100644 --- a/opencl/test/unit_test/fixtures/kernel_arg_fixture.cpp +++ b/opencl/test/unit_test/fixtures/kernel_arg_fixture.cpp @@ -61,7 +61,7 @@ void KernelImageArgTest::SetUp() { ClDeviceFixture::SetUp(); context.reset(new MockContext(pClDevice)); program = std::make_unique(context.get(), false, toClDeviceVector(*pClDevice)); - pKernel.reset(new MockKernel(program.get(), *pKernelInfo)); + pKernel.reset(new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex))); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setKernelArgHandler(0, &Kernel::setArgImage); diff --git a/opencl/test/unit_test/fixtures/media_kernel_fixture.h b/opencl/test/unit_test/fixtures/media_kernel_fixture.h index bf243cb868..0eac78366d 100644 --- a/opencl/test/unit_test/fixtures/media_kernel_fixture.h +++ b/opencl/test/unit_test/fixtures/media_kernel_fixture.h @@ -76,7 +76,7 @@ struct MediaKernelFixture : public HelloWorldFixture, // create the VME kernel pVmeKernel = Kernel::create( pProgram, - *pProgram->getKernelInfo("device_side_block_motion_estimate_intel", pProgram->getDevices()[0]->getRootDeviceIndex()), + pProgram->getKernelInfosForKernel("device_side_block_motion_estimate_intel"), &retVal); ASSERT_NE(nullptr, pVmeKernel); diff --git a/opencl/test/unit_test/fixtures/simple_arg_kernel_fixture.h b/opencl/test/unit_test/fixtures/simple_arg_kernel_fixture.h index d5e6768405..6a2014b047 100644 --- a/opencl/test/unit_test/fixtures/simple_arg_kernel_fixture.h +++ b/opencl/test/unit_test/fixtures/simple_arg_kernel_fixture.h @@ -110,7 +110,7 @@ class SimpleArgKernelFixture : public ProgramFixture { // create a kernel pKernel = Kernel::create( pProgram, - *pProgram->getKernelInfo("SimpleArg", pDevice->getRootDeviceIndex()), + pProgram->getKernelInfosForKernel("SimpleArg"), &retVal); ASSERT_NE(nullptr, pKernel); @@ -156,7 +156,7 @@ class SimpleArgNonUniformKernelFixture : public ProgramFixture { kernel = Kernel::create( pProgram, - *pProgram->getKernelInfo("simpleNonUniform", device->getRootDeviceIndex()), + pProgram->getKernelInfosForKernel("simpleNonUniform"), &retVal); ASSERT_NE(nullptr, kernel); ASSERT_EQ(CL_SUCCESS, retVal); @@ -202,7 +202,7 @@ class SimpleKernelFixture : public ProgramFixture { kernelName.append(std::to_string(i)); kernels[i].reset(Kernel::create( pProgram, - *pProgram->getKernelInfo(kernelName.c_str(), device->getRootDeviceIndex()), + pProgram->getKernelInfosForKernel(kernelName.c_str()), &retVal)); ASSERT_NE(nullptr, kernels[i]); ASSERT_EQ(CL_SUCCESS, retVal); @@ -251,7 +251,7 @@ class SimpleKernelStatelessFixture : public ProgramFixture { kernel.reset(Kernel::create( pProgram, - *pProgram->getKernelInfo("statelessKernel", device->getRootDeviceIndex()), + pProgram->getKernelInfosForKernel("statelessKernel"), &retVal)); ASSERT_NE(nullptr, kernel); ASSERT_EQ(CL_SUCCESS, retVal); @@ -294,7 +294,7 @@ class BindlessKernelFixture : public ProgramFixture { kernel.reset(Kernel::create( pProgram, - *pProgram->getKernelInfo(kernelName.c_str(), deviceCl->getRootDeviceIndex()), + pProgram->getKernelInfosForKernel(kernelName.c_str()), &retVal)); ASSERT_NE(nullptr, kernel); ASSERT_EQ(CL_SUCCESS, retVal); diff --git a/opencl/test/unit_test/gen12lp/gpgpu_walker_tests_gen12lp.cpp b/opencl/test/unit_test/gen12lp/gpgpu_walker_tests_gen12lp.cpp index 90b142b441..93a6914a9b 100644 --- a/opencl/test/unit_test/gen12lp/gpgpu_walker_tests_gen12lp.cpp +++ b/opencl/test/unit_test/gen12lp/gpgpu_walker_tests_gen12lp.cpp @@ -37,7 +37,7 @@ GEN12LPTEST_F(GpgpuWalkerTests, givenMiStoreRegMemWhenAdjustMiStoreRegMemModeThe class MockKernelWithApplicableWa : public MockKernel { public: - MockKernelWithApplicableWa(Program *program, KernelInfo &kernelInfo) : MockKernel(program, kernelInfo) {} + MockKernelWithApplicableWa(Program *program, KernelInfoContainer &kernelInfos) : MockKernel(program, kernelInfos) {} bool requiresWaDisableRccRhwoOptimization(uint32_t rootDeviceIndex) const override { return waApplicable; } @@ -52,7 +52,8 @@ struct HardwareInterfaceTests : public ClDeviceFixture, public LinearStreamFixtu pContext = new NEO::MockContext(pClDevice); pCommandQueue = new MockCommandQueue(pContext, pClDevice, nullptr); pProgram = new MockProgram(pContext, false, toClDeviceVector(*pClDevice)); - pKernel = new MockKernelWithApplicableWa(static_cast(pProgram), pProgram->mockKernelInfo); + auto kernelInfos = MockKernel::toKernelInfoContainer(pProgram->mockKernelInfo, rootDeviceIndex); + pKernel = new MockKernelWithApplicableWa(pProgram, kernelInfos); } void TearDown() override { diff --git a/opencl/test/unit_test/gtpin/gtpin_tests.cpp b/opencl/test/unit_test/gtpin/gtpin_tests.cpp index eca492b5c3..fe6a60d66f 100644 --- a/opencl/test/unit_test/gtpin/gtpin_tests.cpp +++ b/opencl/test/unit_test/gtpin/gtpin_tests.cpp @@ -2396,7 +2396,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenOnKernelSubitIsCalledThenCo auto pProgramm = std::make_unique(context.get(), false, toClDeviceVector(*pDevice)); std::unique_ptr cmdQ(new MockCommandQueue(context.get(), pDevice, nullptr)); - std::unique_ptr pKernel(new MockKernel(pProgramm.get(), *pKernelInfo)); + std::unique_ptr pKernel(new MockKernel(pProgramm.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex))); pKernel->setSshLocal(nullptr, sizeof(surfaceStateHeap), rootDeviceIndex); diff --git a/opencl/test/unit_test/helpers/dispatch_info_builder_tests.cpp b/opencl/test/unit_test/helpers/dispatch_info_builder_tests.cpp index dea1ae9932..fc5c9e0fc8 100644 --- a/opencl/test/unit_test/helpers/dispatch_info_builder_tests.cpp +++ b/opencl/test/unit_test/helpers/dispatch_info_builder_tests.cpp @@ -67,7 +67,7 @@ class DispatchInfoBuilderFixture : public ContextFixture, public ClDeviceFixture pProgram = new MockProgram(pContext, false, toClDeviceVector(*pClDevice)); - pKernel = new MockKernel(pProgram, *pKernelInfo); + pKernel = new MockKernel(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); pKernel->setKernelArgHandler(0, &Kernel::setArgBuffer); diff --git a/opencl/test/unit_test/helpers/dispatch_info_tests.cpp b/opencl/test/unit_test/helpers/dispatch_info_tests.cpp index d090829327..1ab386f2ae 100644 --- a/opencl/test/unit_test/helpers/dispatch_info_tests.cpp +++ b/opencl/test/unit_test/helpers/dispatch_info_tests.cpp @@ -39,7 +39,7 @@ class DispatchInfoFixture : public ContextFixture, public ClDeviceFixture { pKernelInfo->patchInfo.pAllocateStatelessPrintfSurface = pPrintfSurface; pProgram = new MockProgram(pContext, false, toClDeviceVector(*pClDevice)); - pKernel = new MockKernel(pProgram, *pKernelInfo); + pKernel = new MockKernel(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); pKernel->slmTotalSize = 128; } void TearDown() override { diff --git a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp index 11722424f4..42ca6bedc8 100644 --- a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp +++ b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp @@ -183,7 +183,7 @@ HWTEST_F(HardwareCommandsTest, givenSendCrossThreadDataWhenWhenAddPatchInfoComme MockProgram program(&context, false, toClDeviceVector(*pClDevice)); auto kernelInfo = std::make_unique(); - std::unique_ptr kernel(new MockKernel(&program, *kernelInfo)); + std::unique_ptr kernel(new MockKernel(&program, MockKernel::toKernelInfoContainer(*kernelInfo, rootDeviceIndex))); auto &indirectHeap = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 8192); @@ -252,7 +252,7 @@ HWTEST_F(HardwareCommandsTest, givenSendCrossThreadDataWhenWhenAddPatchInfoComme MockProgram program(&context, false, toClDeviceVector(*pClDevice)); auto kernelInfo = std::make_unique(); - std::unique_ptr kernel(new MockKernel(&program, *kernelInfo)); + std::unique_ptr kernel(new MockKernel(&program, MockKernel::toKernelInfoContainer(*kernelInfo, rootDeviceIndex))); auto &indirectHeap = cmdQ.getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 8192); indirectHeap.getSpace(128u); @@ -559,7 +559,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, whenSendingIndirectStateThenKe modifiedKernelInfo.kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[0] = 2; modifiedKernelInfo.kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[1] = 1; modifiedKernelInfo.kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[2] = 0; - MockKernel mockKernel(kernel->getProgram(), modifiedKernelInfo, false); + KernelInfoContainer kernelInfos; + kernelInfos.push_back(&modifiedKernelInfo); + MockKernel mockKernel(kernel->getProgram(), kernelInfos, false); uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(mockKernel); @@ -740,7 +742,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenGettingBindingTableStateTh program.setConstantSurface(&gfxConstAlloc); // create kernel - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo); + MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); // setup surface state heap constexpr uint32_t numSurfaces = 5; @@ -858,7 +860,7 @@ HWTEST_F(HardwareCommandsTest, GivenBuffersNotRequiringSshWhenSettingBindingTabl MockProgram program(&context, false, toClDeviceVector(*pClDevice)); // create kernel - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo); + MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); // setup surface state heap char surfaceStateHeap[256]; @@ -913,7 +915,7 @@ HWTEST_F(HardwareCommandsTest, GivenZeroSurfaceStatesWhenSettingBindingTableStat MockProgram program(&context, false, toClDeviceVector(*pClDevice)); // create kernel - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo); + MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); // setup surface state heap char surfaceStateHeap[256]; @@ -1324,7 +1326,7 @@ HWTEST_F(KernelCacheFlushTests, givenLocallyUncachedBufferWhenGettingAllocations DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableCacheFlushAfterWalker.set(-1); - auto kernel = clUniquePtr(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer", rootDeviceIndex), &retVal)); + auto kernel = clUniquePtr(Kernel::create(pProgram, pProgram->getKernelInfosForKernel("CopyBuffer"), &retVal)); cl_mem_properties_intel bufferPropertiesUncachedResource[] = {CL_MEM_FLAGS_INTEL, CL_MEM_LOCALLY_UNCACHED_RESOURCE, 0}; auto bufferLocallyUncached = clCreateBufferWithPropertiesINTEL(context, bufferPropertiesUncachedResource, 0, 1, nullptr, nullptr); diff --git a/opencl/test/unit_test/kernel/clone_kernel_tests.cpp b/opencl/test/unit_test/kernel/clone_kernel_tests.cpp index 0e88e99398..9ecbdba95e 100644 --- a/opencl/test/unit_test/kernel/clone_kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/clone_kernel_tests.cpp @@ -83,12 +83,12 @@ class CloneKernelFixture : public ContextFixture, public ClDeviceFixture { pProgram = new MockProgram(pContext, false, toClDeviceVector(*pClDevice)); - pSourceKernel = new MockKernel(pProgram, *pKernelInfo); + pSourceKernel = new MockKernel(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, pSourceKernel->initialize()); char pSourceCrossThreadData[64] = {}; pSourceKernel->setCrossThreadData(pSourceCrossThreadData, sizeof(pSourceCrossThreadData)); - pClonedKernel = new MockKernel(pProgram, *pKernelInfo); + pClonedKernel = new MockKernel(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, pClonedKernel->initialize()); char pClonedCrossThreadData[64] = {}; pClonedKernel->setCrossThreadData(pClonedCrossThreadData, sizeof(pClonedCrossThreadData)); diff --git a/opencl/test/unit_test/kernel/kernel_accelerator_arg_tests.cpp b/opencl/test/unit_test/kernel/kernel_accelerator_arg_tests.cpp index 3bb0839167..959e8223e2 100644 --- a/opencl/test/unit_test/kernel/kernel_accelerator_arg_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_accelerator_arg_tests.cpp @@ -61,7 +61,7 @@ class KernelArgAcceleratorFixture : public ContextFixture, public ClDeviceFixtur pKernelInfo->kernelArgInfo[0].offsetVmeSearchPathType = 0x1c; pProgram = new MockProgram(pContext, false, toClDeviceVector(*pClDevice)); - pKernel = new MockKernel(pProgram, *pKernelInfo); + pKernel = new MockKernel(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setKernelArgHandler(0, &Kernel::setArgAccelerator); diff --git a/opencl/test/unit_test/kernel/kernel_arg_buffer_fixture.cpp b/opencl/test/unit_test/kernel/kernel_arg_buffer_fixture.cpp index 88d41fa009..966d9b4f08 100644 --- a/opencl/test/unit_test/kernel/kernel_arg_buffer_fixture.cpp +++ b/opencl/test/unit_test/kernel/kernel_arg_buffer_fixture.cpp @@ -52,7 +52,7 @@ void KernelArgBufferFixture::SetUp() { pProgram = new MockProgram(pContext, false, toClDeviceVector(*pClDevice)); - pKernel = new MockKernel(pProgram, *pKernelInfo); + pKernel = new MockKernel(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); diff --git a/opencl/test/unit_test/kernel/kernel_arg_dev_queue_tests.cpp b/opencl/test/unit_test/kernel/kernel_arg_dev_queue_tests.cpp index 7221a9d69c..58ac88fdd5 100644 --- a/opencl/test/unit_test/kernel/kernel_arg_dev_queue_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_arg_dev_queue_tests.cpp @@ -33,7 +33,7 @@ struct KernelArgDevQueueTest : public DeviceHostQueueFixture { pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector.push_back(kernelArgPatchInfo); program = std::make_unique(toClDeviceVector(*pDevice)); - pKernel = new MockKernel(program.get(), *pKernelInfo); + pKernel = new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, testedRootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); uint8_t pCrossThreadData[crossThreadDataSize]; diff --git a/opencl/test/unit_test/kernel/kernel_arg_info_tests.cpp b/opencl/test/unit_test/kernel/kernel_arg_info_tests.cpp index be4e787a24..de3d9faaf8 100644 --- a/opencl/test/unit_test/kernel/kernel_arg_info_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_arg_info_tests.cpp @@ -40,7 +40,7 @@ class KernelArgInfoTest : public ProgramFromSourceTest { // create a kernel pKernel = Kernel::create( pProgram, - *pProgram->getKernelInfo(kernelName, rootDeviceIndex), + pProgram->getKernelInfosForKernel(kernelName), &retVal); ASSERT_EQ(CL_SUCCESS, retVal); diff --git a/opencl/test/unit_test/kernel/kernel_arg_pipe_tests.cpp b/opencl/test/unit_test/kernel/kernel_arg_pipe_tests.cpp index 43db09eb00..7a51cdbb1a 100644 --- a/opencl/test/unit_test/kernel/kernel_arg_pipe_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_arg_pipe_tests.cpp @@ -56,7 +56,7 @@ class KernelArgPipeFixture : public ContextFixture, public ClDeviceFixture { pProgram = new MockProgram(pContext, false, toClDeviceVector(*pClDevice)); - pKernel = new MockKernel(pProgram, *pKernelInfo); + pKernel = new MockKernel(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); diff --git a/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp b/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp index 1b97630e5e..61b89b356f 100644 --- a/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp @@ -55,7 +55,7 @@ class KernelArgSvmFixture_ : public ContextFixture, public ClDeviceFixture { pProgram = new MockProgram(pContext, false, toClDeviceVector(*pClDevice)); - pKernel = new MockKernel(pProgram, *pKernelInfo); + pKernel = new MockKernel(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); } diff --git a/opencl/test/unit_test/kernel/kernel_immediate_arg_tests.cpp b/opencl/test/unit_test/kernel/kernel_immediate_arg_tests.cpp index b2d912d6d2..6a388282ea 100644 --- a/opencl/test/unit_test/kernel/kernel_immediate_arg_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_immediate_arg_tests.cpp @@ -54,7 +54,7 @@ class KernelArgImmediateTest : public Test { pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].size = sizeof(T); program = std::make_unique(toClDeviceVector(*pClDevice)); - pKernel = new MockKernel(program.get(), *pKernelInfo); + pKernel = new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); diff --git a/opencl/test/unit_test/kernel/kernel_is_patched_tests.cpp b/opencl/test/unit_test/kernel/kernel_is_patched_tests.cpp index 87143b29dd..0ff95511d8 100644 --- a/opencl/test/unit_test/kernel/kernel_is_patched_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_is_patched_tests.cpp @@ -20,10 +20,10 @@ class PatchedKernelTest : public ::testing::Test { void SetUp() override { device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get(), rootDeviceIndex)); context.reset(new MockContext(device.get())); - program.reset(Program::createBuiltInFromSource("FillBufferBytes", context.get(), context->getDevices(), &retVal)); + program.reset(Program::createBuiltInFromSource("FillBufferBytes", context.get(), context->getDevices(), &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); program->build(program->getDevices(), nullptr, false); - kernel.reset(Kernel::create(program.get(), *program->getKernelInfo("FillBufferBytes", rootDeviceIndex), &retVal)); + kernel.reset(Kernel::create(program.get(), program->getKernelInfosForKernel("FillBufferBytes"), &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); } void TearDown() override { @@ -33,7 +33,7 @@ class PatchedKernelTest : public ::testing::Test { const uint32_t rootDeviceIndex = 0u; std::unique_ptr context; std::unique_ptr device; - std::unique_ptr program; + std::unique_ptr program; std::unique_ptr kernel; cl_int retVal = CL_SUCCESS; }; diff --git a/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp b/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp index 0dc2af6156..2b8ea9442a 100644 --- a/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp @@ -613,7 +613,7 @@ TEST(KernelReflectionSurfaceTestSingle, GivenNonParentKernelWhenCreatingKernelRe MockClDevice device{new MockDevice}; MockProgram program(toClDeviceVector(device)); KernelInfo info; - MockKernel kernel(&program, info); + MockKernel kernel(&program, MockKernel::toKernelInfoContainer(info, device.getRootDeviceIndex())); EXPECT_FALSE(kernel.isParentKernel); @@ -630,8 +630,10 @@ TEST(KernelReflectionSurfaceTestSingle, GivenNonSchedulerKernelWithForcedSchedul MockClDevice device{new MockDevice}; MockProgram program(toClDeviceVector(device)); + KernelInfoContainer kernelInfos; KernelInfo info; - MockKernel kernel(&program, info); + kernelInfos.push_back(&info); + MockKernel kernel(&program, kernelInfos); EXPECT_FALSE(kernel.isParentKernel); @@ -668,7 +670,9 @@ TEST(KernelReflectionSurfaceTestSingle, GivenNoKernelArgsWhenObtainingKernelRefl bindingTableState.SurfaceStateOffset = 0; info.patchInfo.bindingTableState = &bindingTableState; - MockKernel kernel(&program, info); + KernelInfoContainer kernelInfos; + kernelInfos.push_back(&info); + MockKernel kernel(&program, kernelInfos); EXPECT_TRUE(kernel.isParentKernel); @@ -731,7 +735,9 @@ TEST(KernelReflectionSurfaceTestSingle, GivenDeviceQueueKernelArgWhenObtainingKe info.kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset = devQueueCurbeOffset; info.kernelArgInfo[0].kernelArgPatchInfoVector[0].size = devQueueCurbeSize; - MockKernel kernel(&program, info); + KernelInfoContainer kernelInfos; + kernelInfos.push_back(&info); + MockKernel kernel(&program, kernelInfos); EXPECT_TRUE(kernel.isParentKernel); @@ -2138,7 +2144,11 @@ TEST_F(KernelReflectionMultiDeviceTest, GivenNoKernelArgsWhenObtainingKernelRefl bindingTableState.SurfaceStateOffset = 0; info.patchInfo.bindingTableState = &bindingTableState; - MockKernel kernel(&program, info); + auto rootDeviceIndex = device1->getRootDeviceIndex(); + KernelInfoContainer kernelInfos; + kernelInfos.resize(rootDeviceIndex + 1); + kernelInfos[rootDeviceIndex] = &info; + MockKernel kernel(&program, kernelInfos); EXPECT_TRUE(kernel.isParentKernel); @@ -2201,7 +2211,11 @@ TEST_F(KernelReflectionMultiDeviceTest, GivenDeviceQueueKernelArgWhenObtainingKe info.kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset = devQueueCurbeOffset; info.kernelArgInfo[0].kernelArgPatchInfoVector[0].size = devQueueCurbeSize; - MockKernel kernel(&program, info); + auto rootDeviceIndex = device1->getRootDeviceIndex(); + KernelInfoContainer kernelInfos; + kernelInfos.resize(rootDeviceIndex + 1); + kernelInfos[rootDeviceIndex] = &info; + MockKernel kernel(&program, kernelInfos); EXPECT_TRUE(kernel.isParentKernel); diff --git a/opencl/test/unit_test/kernel/kernel_slm_arg_tests.cpp b/opencl/test/unit_test/kernel/kernel_slm_arg_tests.cpp index 1d3deccced..45cc29f851 100644 --- a/opencl/test/unit_test/kernel/kernel_slm_arg_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_slm_arg_tests.cpp @@ -40,7 +40,7 @@ class KernelSlmArgTest : public Test { pKernelInfo->workloadInfo.slmStaticSize = 3 * KB; program = std::make_unique(toClDeviceVector(*pClDevice)); - pKernel = new MockKernel(program.get(), *pKernelInfo); + pKernel = new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setKernelArgHandler(0, &Kernel::setArgLocal); diff --git a/opencl/test/unit_test/kernel/kernel_slm_tests.cpp b/opencl/test/unit_test/kernel/kernel_slm_tests.cpp index 09b22536da..222b3ee928 100644 --- a/opencl/test/unit_test/kernel/kernel_slm_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_slm_tests.cpp @@ -72,7 +72,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, KernelSLMAndBarrierTest, GivenStaticSlmSizeWhenProgr executionEnvironment.HasBarriers = 1; kernelInfo.workloadInfo.slmStaticSize = GetParam() * KB; - MockKernel kernel(program.get(), kernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); // After creating Mock Kernel now create Indirect Heap diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index 95bc398795..532ec4ef14 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -67,7 +67,7 @@ class KernelTests : public ProgramFromBinaryFixture { // create a kernel pKernel = Kernel::create( pProgram, - *pProgram->getKernelInfo(kernelName, rootDeviceIndex), + pProgram->getKernelInfosForKernel(kernelName), &retVal); ASSERT_EQ(CL_SUCCESS, retVal); @@ -345,12 +345,12 @@ TEST_F(KernelFromBinaryTests, GivenKernelNumArgsWhenGettingInfoThenNumberOfKerne ASSERT_EQ(CL_SUCCESS, retVal); - auto pKernelInfo = pProgram->getKernelInfo("test", rootDeviceIndex); + auto kernelInfos = pProgram->getKernelInfosForKernel("test"); // create a kernel auto pKernel = Kernel::create( pProgram, - *pKernelInfo, + kernelInfos, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); @@ -383,12 +383,12 @@ TEST_F(KernelFromBinaryTests, WhenRegularKernelIsCreatedThenItIsNotBuiltIn) { ASSERT_EQ(CL_SUCCESS, retVal); - auto pKernelInfo = pProgram->getKernelInfo("simple_kernel_0", rootDeviceIndex); + auto kernelInfos = pProgram->getKernelInfosForKernel("simple_kernel_0"); // create a kernel auto pKernel = Kernel::create( pProgram, - *pKernelInfo, + kernelInfos, &retVal); ASSERT_EQ(CL_SUCCESS, retVal); @@ -556,7 +556,7 @@ TEST_F(KernelPrivateSurfaceTest, WhenChangingResidencyThenCsrResidencySizeIsUpda // create kernel MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo); + MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); // Test it @@ -594,7 +594,7 @@ TEST_F(KernelPrivateSurfaceTest, givenKernelWithPrivateSurfaceThatIsInUseByGpuWh MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); - std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo)); + std::unique_ptr pKernel(new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex))); pKernel->initialize(); auto &csr = pDevice->getGpgpuCommandStreamReceiver(); @@ -639,7 +639,7 @@ TEST_F(KernelPrivateSurfaceTest, WhenPrivateSurfaceAllocationFailsThenOutOfResou MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); MemoryManagementFixture::InjectedFunction method = [&](size_t failureIndex) { - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo); + MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); if (MemoryManagement::nonfailingAllocation == failureIndex) { EXPECT_EQ(CL_SUCCESS, pKernel->initialize()); @@ -683,7 +683,7 @@ TEST_F(KernelPrivateSurfaceTest, given32BitDeviceWhenKernelIsCreatedThenPrivateS // create kernel MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo); + MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); @@ -717,7 +717,7 @@ HWTEST_F(KernelPrivateSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenPri MockProgram program(&context, false, toClDeviceVector(*pClDevice)); // create kernel - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo); + MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); // setup surface state heap char surfaceStateHeap[0x80]; @@ -765,7 +765,7 @@ TEST_F(KernelPrivateSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenPriv program.setConstantSurface(&gfxAlloc); // create kernel - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo); + MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); // define stateful path pKernelInfo->usesSsh = false; @@ -807,7 +807,7 @@ TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointe pKernelInfo->patchInfo.executionEnvironment = executionEnvironment.get(); MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); - std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo)); + std::unique_ptr pKernel(new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex))); pKernelInfo->gpuPointerSize = 4; pDevice->getMemoryManager()->setForce32BitAllocations(false); if (pDevice->getDeviceInfo().computeUnitsUsedForScratch == 0) @@ -826,7 +826,7 @@ TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointe pKernelInfo->patchInfo.executionEnvironment = executionEnvironment.get(); MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); - std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo)); + std::unique_ptr pKernel(new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex))); pKernelInfo->gpuPointerSize = 4; pDevice->getMemoryManager()->setForce32BitAllocations(true); if (pDevice->getDeviceInfo().computeUnitsUsedForScratch == 0) @@ -845,7 +845,7 @@ TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenPrivateSurfaceTooBigAndGpuPointe pKernelInfo->patchInfo.executionEnvironment = executionEnvironment.get(); MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); - std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo)); + std::unique_ptr pKernel(new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex))); pKernelInfo->gpuPointerSize = 8; pDevice->getMemoryManager()->setForce32BitAllocations(true); if (pDevice->getDeviceInfo().computeUnitsUsedForScratch == 0) @@ -884,7 +884,7 @@ TEST_F(KernelGlobalSurfaceTest, givenBuiltInKernelWhenKernelIsCreatedThenGlobalS MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); program.setGlobalSurface(&gfxAlloc); - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo); + MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); pKernel->isBuiltIn = true; @@ -926,7 +926,7 @@ TEST_F(KernelGlobalSurfaceTest, givenNDRangeKernelWhenKernelIsCreatedThenGlobalS // create kernel MockProgram program(toClDeviceVector(*pClDevice)); program.setGlobalSurface(&gfxAlloc); - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo); + MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); @@ -965,7 +965,7 @@ HWTEST_F(KernelGlobalSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenGlob program.setGlobalSurface(&gfxAlloc); // create kernel - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo); + MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); // setup surface state heap char surfaceStateHeap[0x80]; @@ -1011,7 +1011,7 @@ TEST_F(KernelGlobalSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenGloba program.setGlobalSurface(&gfxAlloc); // create kernel - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo); + MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); // define stateful path pKernelInfo->usesSsh = false; @@ -1056,7 +1056,7 @@ TEST_F(KernelConstantSurfaceTest, givenBuiltInKernelWhenKernelIsCreatedThenConst // create kernel MockProgram program(toClDeviceVector(*pClDevice)); program.setConstantSurface(&gfxAlloc); - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo); + MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); pKernel->isBuiltIn = true; @@ -1098,7 +1098,7 @@ TEST_F(KernelConstantSurfaceTest, givenNDRangeKernelWhenKernelIsCreatedThenConst // create kernel MockProgram program(toClDeviceVector(*pClDevice)); program.setConstantSurface(&gfxAlloc); - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo); + MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); @@ -1137,7 +1137,7 @@ HWTEST_F(KernelConstantSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenCo program.setConstantSurface(&gfxAlloc); // create kernel - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo); + MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); // setup surface state heap char surfaceStateHeap[0x80]; @@ -1183,7 +1183,7 @@ TEST_F(KernelConstantSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenCon program.setConstantSurface(&gfxAlloc); // create kernel - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo); + MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); // define stateful path pKernelInfo->usesSsh = false; @@ -1219,7 +1219,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatefulKernelWhenK // create kernel MockProgram program(&context, false, toClDeviceVector(*pClDevice)); - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo); + MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); // setup surface state heap char surfaceStateHeap[0x80]; @@ -1268,7 +1268,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatefulKernelWhenE // create kernel MockProgram program(&context, false, toClDeviceVector(*pClDevice)); - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo); + MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); // setup surface state heap char surfaceStateHeap[0x80]; @@ -1311,7 +1311,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenKernelWithNullEvent // create kernel MockProgram program(toClDeviceVector(*pClDevice)); - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo); + MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); // define stateful path pKernelInfo->usesSsh = false; @@ -1349,7 +1349,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatelessKernelWhen // create kernel MockProgram program(toClDeviceVector(*pClDevice)); - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo); + MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); // define stateful path pKernelInfo->usesSsh = false; @@ -1385,7 +1385,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatelessKernelWhen // create kernel MockProgram program(toClDeviceVector(*pClDevice)); - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo); + MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); // define stateful path pKernelInfo->usesSsh = false; @@ -1423,7 +1423,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatefulKe // create kernel MockProgram program(&context, false, toClDeviceVector(*pClDevice)); - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo); + MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); // setup surface state heap char surfaceStateHeap[0x80]; @@ -1472,7 +1472,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatefulKe // create kernel MockProgram program(&context, false, toClDeviceVector(*pClDevice)); - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo); + MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); // setup surface state heap char surfaceStateHeap[0x80]; @@ -1523,7 +1523,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatelessK // create kernel MockProgram program(toClDeviceVector(*pClDevice)); - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo); + MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); // define stateful path pKernelInfo->usesSsh = false; @@ -1551,7 +1551,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenKernelWith // create kernel MockProgram program(toClDeviceVector(*pClDevice)); - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo); + MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); // define stateful path pKernelInfo->usesSsh = false; @@ -1589,7 +1589,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatelessK // create kernel MockProgram program(toClDeviceVector(*pClDevice)); - MockKernel *pKernel = new MockKernel(&program, *pKernelInfo); + MockKernel *pKernel = new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); // define stateful path pKernelInfo->usesSsh = false; @@ -1635,7 +1635,7 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenKernelIsaIs MockProgram program(toClDeviceVector(*pClDevice)); MockContext ctx; program.setContext(&ctx); - std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo)); + std::unique_ptr pKernel(new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex))); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); @@ -1660,7 +1660,7 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenExportedFun program.buildInfos[pDevice->getRootDeviceIndex()].exportedFunctionsSurface = exportedFunctionsSurface.get(); MockContext ctx; program.setContext(&ctx); - std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo)); + std::unique_ptr pKernel(new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex))); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(0u, commandStreamReceiver.makeResidentAllocations.size()); @@ -1697,7 +1697,7 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenGlobalBuffe MockContext ctx; program.setContext(&ctx); program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation(); - std::unique_ptr pKernel(new MockKernel(&program, *pKernelInfo)); + std::unique_ptr pKernel(new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex))); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); EXPECT_EQ(0u, commandStreamReceiver.makeResidentAllocations.size()); @@ -2033,10 +2033,11 @@ TEST(KernelImageDetectionTests, givenKernelWithImagesOnlyWhenItIsAskedIfItHasIma pKernelInfo->kernelArgInfo[1].isMediaBlockImage = true; pKernelInfo->kernelArgInfo[0].isMediaImage = true; - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + const auto rootDeviceIndex = 0u; + auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get(), rootDeviceIndex)); auto context = clUniquePtr(new MockContext(device.get())); auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device))); - auto kernel = clUniquePtr(new MockKernel(program.get(), *pKernelInfo)); + auto kernel = clUniquePtr(new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex))); EXPECT_FALSE(kernel->usesOnlyImages()); kernel->initialize(); EXPECT_TRUE(kernel->usesOnlyImages()); @@ -2049,10 +2050,11 @@ TEST(KernelImageDetectionTests, givenKernelWithImagesAndBuffersWhenItIsAskedIfIt pKernelInfo->kernelArgInfo[1].isBuffer = true; pKernelInfo->kernelArgInfo[0].isMediaImage = true; - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + const auto rootDeviceIndex = 0u; + auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get(), rootDeviceIndex)); auto context = clUniquePtr(new MockContext(device.get())); auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device))); - auto kernel = clUniquePtr(new MockKernel(program.get(), *pKernelInfo)); + auto kernel = clUniquePtr(new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex))); EXPECT_FALSE(kernel->usesOnlyImages()); kernel->initialize(); EXPECT_FALSE(kernel->usesOnlyImages()); @@ -2063,10 +2065,11 @@ TEST(KernelImageDetectionTests, givenKernelWithNoImagesWhenItIsAskedIfItHasImage pKernelInfo->kernelArgInfo.resize(1); pKernelInfo->kernelArgInfo[0].isBuffer = true; - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + const auto rootDeviceIndex = 0u; + auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get(), rootDeviceIndex)); auto context = clUniquePtr(new MockContext(device.get())); auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device))); - auto kernel = clUniquePtr(new MockKernel(program.get(), *pKernelInfo)); + auto kernel = clUniquePtr(new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex))); EXPECT_FALSE(kernel->usesOnlyImages()); kernel->initialize(); EXPECT_FALSE(kernel->usesOnlyImages()); @@ -2120,7 +2123,7 @@ HWTEST_F(KernelResidencyTest, WhenMakingArgsResidentThenImageFromImageCheckIsCor auto program = std::make_unique(toClDeviceVector(*pClDevice)); program->setContext(&context); - std::unique_ptr pKernel(new MockKernel(program.get(), *pKernelInfo)); + std::unique_ptr pKernel(new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex))); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->storeKernelArg(0, Kernel::IMAGE_OBJ, (cl_mem)imageY.get(), NULL, 0); @@ -2142,7 +2145,7 @@ struct KernelExecutionEnvironmentTest : public Test { executionEnvironment.CompiledSIMD32 = 1; pKernelInfo->patchInfo.executionEnvironment = &executionEnvironment; - pKernel = new MockKernel(program.get(), *pKernelInfo); + pKernel = new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); } @@ -2299,7 +2302,7 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenGlobalWorkOffsetIsCorr pKernelInfo->workloadInfo.globalWorkOffsetOffsets[1] = 4; - MockKernel kernel(program.get(), *pKernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.globalWorkOffsetX); @@ -2312,7 +2315,7 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenLocalWorkSizeIsCorrect pKernelInfo->workloadInfo.localWorkSizeOffsets[0] = 0xc; - MockKernel kernel(program.get(), *pKernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.localWorkSizeX); @@ -2325,7 +2328,7 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenLocalWorkSize2IsCorrec pKernelInfo->workloadInfo.localWorkSizeOffsets2[1] = 0xd; - MockKernel kernel(program.get(), *pKernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.localWorkSizeX2); @@ -2338,7 +2341,7 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenGlobalWorkSizeIsCorrec pKernelInfo->workloadInfo.globalWorkSizeOffsets[2] = 8; - MockKernel kernel(program.get(), *pKernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_EQ(&Kernel::dummyPatchLocation, kernel.globalWorkSizeX); @@ -2351,7 +2354,7 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenLocalWorkDimIsCorrect) pKernelInfo->workloadInfo.workDimOffset = 12; - MockKernel kernel(program.get(), *pKernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.workDim); @@ -2364,7 +2367,7 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenNumWorkGroupsIsCorrect pKernelInfo->workloadInfo.numWorkGroupsOffset[1] = 1 * sizeof(uint32_t); pKernelInfo->workloadInfo.numWorkGroupsOffset[2] = 2 * sizeof(uint32_t); - MockKernel kernel(program.get(), *pKernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.numWorkGroupsX); @@ -2379,7 +2382,7 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenEnqueuedLocalWorkSizeI pKernelInfo->workloadInfo.enqueuedLocalWorkSizeOffsets[0] = 0; - MockKernel kernel(program.get(), *pKernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.enqueuedLocalWorkSizeX); @@ -2392,7 +2395,7 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenEnqueuedMaxWorkGroupSi pKernelInfo->workloadInfo.maxWorkGroupSizeOffset = 12; - MockKernel kernel(program.get(), *pKernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.maxWorkGroupSizeForCrossThreadData); @@ -2405,7 +2408,7 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenEnqueuedMaxWorkGroupSi TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenDataParameterSimdSizeIsCorrect) { pKernelInfo->workloadInfo.simdSizeOffset = 16; - MockKernel kernel(program.get(), *pKernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); executionEnvironment.CompiledSIMD32 = false; executionEnvironment.CompiledSIMD16 = true; executionEnvironment.CompiledSIMD8 = true; @@ -2419,7 +2422,7 @@ TEST_F(KernelCrossThreadTests, WhenKernelIsInitializedThenDataParameterSimdSizeI TEST_F(KernelCrossThreadTests, GivenParentEventOffsetWhenKernelIsInitializedThenParentEventIsInitiatedWithInvalid) { pKernelInfo->workloadInfo.parentEventOffset = 16; - MockKernel kernel(program.get(), *pKernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); EXPECT_NE(nullptr, kernel.parentEventOffset); @@ -2431,7 +2434,7 @@ TEST_F(KernelCrossThreadTests, GivenParentEventOffsetWhenKernelIsInitializedThen TEST_F(KernelCrossThreadTests, WhenAddingKernelThenProgramRefCountIsIncremented) { auto refCount = program->getReference(); - MockKernel *kernel = new MockKernel(program.get(), *pKernelInfo); + MockKernel *kernel = new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); auto refCount2 = program->getReference(); EXPECT_EQ(refCount2, refCount + 1); @@ -2444,7 +2447,7 @@ TEST_F(KernelCrossThreadTests, GivenSlmStatisSizeWhenCreatingKernelThenSlmTotalS pKernelInfo->workloadInfo.slmStaticSize = 1024; - MockKernel *kernel = new MockKernel(program.get(), *pKernelInfo); + MockKernel *kernel = new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); EXPECT_EQ(1024u, kernel->slmTotalSize); @@ -2458,7 +2461,7 @@ TEST_F(KernelCrossThreadTests, givenKernelWithPrivateMemoryWhenItIsCreatedThenCu allocatePrivate.PerThreadPrivateMemorySize = 1; pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &allocatePrivate; - MockKernel *kernel = new MockKernel(program.get(), *pKernelInfo); + MockKernel *kernel = new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); kernel->initialize(); @@ -2477,7 +2480,7 @@ TEST_F(KernelCrossThreadTests, givenKernelWithPrivateMemoryWhenItIsCreatedThenCu TEST_F(KernelCrossThreadTests, givenKernelWithPreferredWkgMultipleWhenItIsCreatedThenCurbeIsPatchedProperly) { pKernelInfo->workloadInfo.preferredWkgMultipleOffset = 8; - MockKernel *kernel = new MockKernel(program.get(), *pKernelInfo); + MockKernel *kernel = new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); kernel->initialize(); @@ -2661,10 +2664,11 @@ TEST(KernelTest, givenKernelWithKernelInfoWith32bitPointerSizeThenReport32bit) { KernelInfo info; info.gpuPointerSize = 4; + const auto rootDeviceIndex = 0u; + auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr, rootDeviceIndex)); MockContext context; - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(&context, false, toClDeviceVector(*device)); - std::unique_ptr kernel(new MockKernel(&program, info)); + std::unique_ptr kernel(new MockKernel(&program, MockKernel::toKernelInfoContainer(info, rootDeviceIndex))); EXPECT_TRUE(kernel->is32Bit()); } @@ -2673,10 +2677,11 @@ TEST(KernelTest, givenKernelWithKernelInfoWith64bitPointerSizeThenReport64bit) { KernelInfo info; info.gpuPointerSize = 8; + const auto rootDeviceIndex = 0u; + auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr, rootDeviceIndex)); MockContext context; - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(&context, false, toClDeviceVector(*device)); - std::unique_ptr kernel(new MockKernel(&program, info)); + std::unique_ptr kernel(new MockKernel(&program, MockKernel::toKernelInfoContainer(info, rootDeviceIndex))); EXPECT_FALSE(kernel->is32Bit()); } @@ -3211,7 +3216,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueHwTest, whenSlbEndOffsetGreaterThanZeroTh using KernelMultiRootDeviceTest = MultiRootDeviceFixture; TEST_F(KernelMultiRootDeviceTest, WhenGettingRootDeviceIndexThenCorrectRootDeviceIndexIsReturned) { - auto kernelInfo = std::make_unique(); + auto pKernelInfo = std::make_unique(); // setup private memory SPatchAllocateStatelessPrivateSurface tokenSPS; @@ -3219,10 +3224,10 @@ TEST_F(KernelMultiRootDeviceTest, WhenGettingRootDeviceIndexThenCorrectRootDevic tokenSPS.DataParamOffset = 40; tokenSPS.DataParamSize = 8; tokenSPS.PerThreadPrivateMemorySize = 112; - kernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &tokenSPS; + pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface = &tokenSPS; MockProgram program(context.get(), false, toClDeviceVector(*device1)); - std::unique_ptr kernel(new MockKernel(&program, *kernelInfo)); + std::unique_ptr kernel(new MockKernel(&program, MockKernel::toKernelInfoContainer(*pKernelInfo, device1->getRootDeviceIndex()))); kernel->initialize(); auto privateSurface = kernel->kernelDeviceInfos[device1->getRootDeviceIndex()].privateSurface; @@ -3241,14 +3246,16 @@ TEST(KernelCreateTest, whenInitFailedThenReturnNull) { MockClDevice mDevice{new MockDevice}; } mockProgram; struct MockKernel { - MockKernel(MockProgram *, const KernelInfo &) {} + MockKernel(MockProgram *, const KernelInfoContainer &) {} int initialize() { return -1; }; }; + KernelInfoContainer kernelInfos; KernelInfo info; info.gpuPointerSize = 8; + kernelInfos.push_back(&info); - auto ret = Kernel::create(&mockProgram, info, nullptr); + auto ret = Kernel::create(&mockProgram, kernelInfos, nullptr); EXPECT_EQ(nullptr, ret); } diff --git a/opencl/test/unit_test/kernel/kernel_transformable_tests.cpp b/opencl/test/unit_test/kernel/kernel_transformable_tests.cpp index 0b02cff30c..a37c9c9049 100644 --- a/opencl/test/unit_test/kernel/kernel_transformable_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_transformable_tests.cpp @@ -46,7 +46,7 @@ class KernelTransformableTest : public ::testing::Test { pKernelInfo->argumentsToPatchNum = 4; program = std::make_unique(toClDeviceVector(*context.getDevice(0))); - pKernel.reset(new MockKernel(program.get(), *pKernelInfo)); + pKernel.reset(new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex))); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setKernelArgHandler(0, &Kernel::setArgSampler); diff --git a/opencl/test/unit_test/kernel/parent_kernel_tests.cpp b/opencl/test/unit_test/kernel/parent_kernel_tests.cpp index 6885921549..e1efa862a2 100644 --- a/opencl/test/unit_test/kernel/parent_kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/parent_kernel_tests.cpp @@ -25,7 +25,7 @@ class MockKernelWithArgumentAccess : public Kernel { class ObjectCountsPublic : public Kernel::ObjectCounts { }; - MockKernelWithArgumentAccess(Program *programArg, const KernelInfo &kernelInfoArg) : Kernel(programArg, kernelInfoArg, false) { + MockKernelWithArgumentAccess(Program *programArg, const KernelInfoContainer &kernelInfoArg) : Kernel(programArg, kernelInfoArg, false) { } void getParentObjectCountsPublic(MockKernelWithArgumentAccess::ObjectCountsPublic &objectCount) { @@ -42,7 +42,7 @@ TEST(ParentKernelTest, WhenArgsAddedThenObjectCountsAreIncremented) { info.patchInfo.executionEnvironment = &environment; - MockKernelWithArgumentAccess kernel(&program, info); + MockKernelWithArgumentAccess kernel(&program, MockKernel::toKernelInfoContainer(info, device->getRootDeviceIndex())); std::vector &args = kernel.getKernelArguments(); diff --git a/opencl/test/unit_test/mem_obj/buffer_set_arg_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_set_arg_tests.cpp index b5b4ca0a0a..c594a3fa9b 100644 --- a/opencl/test/unit_test/mem_obj/buffer_set_arg_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_set_arg_tests.cpp @@ -70,7 +70,7 @@ class BufferSetArgTest : public ContextFixture, pProgram = new MockProgram(pContext, false, toClDeviceVector(*pClDevice)); - pKernel = new MockKernel(pProgram, *pKernelInfo); + pKernel = new MockKernel(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); ASSERT_NE(nullptr, pKernel); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); pKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData)); diff --git a/opencl/test/unit_test/mem_obj/image_set_arg_tests.cpp b/opencl/test/unit_test/mem_obj/image_set_arg_tests.cpp index f4ec5e95a3..0f22c8c3ab 100644 --- a/opencl/test/unit_test/mem_obj/image_set_arg_tests.cpp +++ b/opencl/test/unit_test/mem_obj/image_set_arg_tests.cpp @@ -79,7 +79,7 @@ class ImageSetArgTest : public ClDeviceFixture, pKernelInfo->kernelArgInfo[0].isImage = true; program = std::make_unique(toClDeviceVector(*pClDevice)); - pKernel = new MockKernel(program.get(), *pKernelInfo); + pKernel = new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); ASSERT_NE(nullptr, pKernel); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); @@ -936,7 +936,7 @@ class ImageMediaBlockSetArgTest : public ImageSetArgTest { pKernelInfo->kernelArgInfo[0].isMediaBlockImage = true; program = std::make_unique(toClDeviceVector(*pClDevice)); - pKernel = new MockKernel(program.get(), *pKernelInfo); + pKernel = new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); ASSERT_NE(nullptr, pKernel); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); diff --git a/opencl/test/unit_test/mocks/mock_kernel.cpp b/opencl/test/unit_test/mocks/mock_kernel.cpp index 1185e04e26..b243dc1a83 100644 --- a/opencl/test/unit_test/mocks/mock_kernel.cpp +++ b/opencl/test/unit_test/mocks/mock_kernel.cpp @@ -36,6 +36,13 @@ void Kernel::ReflectionSurfaceHelper::patchBlocksCurbe(void *reflectionSur template void Kernel::patchReflectionSurface(DeviceQueue *, PrintfHandler *); +const KernelInfoContainer MockKernel::toKernelInfoContainer(const KernelInfo &kernelInfo, uint32_t rootDeviceIndex) { + KernelInfoContainer kernelInfos; + kernelInfos.resize(rootDeviceIndex + 1); + kernelInfos[rootDeviceIndex] = &kernelInfo; + return kernelInfos; +} + bool MockKernel::isPatched() const { return isPatchedOverride; } diff --git a/opencl/test/unit_test/mocks/mock_kernel.h b/opencl/test/unit_test/mocks/mock_kernel.h index 300fecd3ee..85e3b9a4d2 100644 --- a/opencl/test/unit_test/mocks/mock_kernel.h +++ b/opencl/test/unit_test/mocks/mock_kernel.h @@ -94,7 +94,7 @@ class MockKernel : public Kernel { } }; - MockKernel(Program *programArg, const KernelInfo &kernelInfoArg, bool scheduler = false) + MockKernel(Program *programArg, const KernelInfoContainer &kernelInfoArg, bool scheduler = false) : Kernel(programArg, kernelInfoArg, scheduler) { } @@ -141,8 +141,11 @@ class MockKernel : public Kernel { info->crossThreadData = new char[crossThreadSize]; - auto kernel = new KernelType(program, *info); auto rootDeviceIndex = device.getRootDeviceIndex(); + KernelInfoContainer kernelInfos; + kernelInfos.resize(rootDeviceIndex + 1); + kernelInfos[rootDeviceIndex] = info; + auto kernel = new KernelType(program, kernelInfos); kernel->kernelDeviceInfos[rootDeviceIndex].crossThreadData = new char[crossThreadSize]; memset(kernel->kernelDeviceInfos[rootDeviceIndex].crossThreadData, 0, crossThreadSize); kernel->kernelDeviceInfos[rootDeviceIndex].crossThreadDataSize = crossThreadSize; @@ -152,6 +155,8 @@ class MockKernel : public Kernel { return kernel; } + static const KernelInfoContainer toKernelInfoContainer(const KernelInfo &kernelInfo, uint32_t rootDeviceIndex); + uint32_t getPatchedArgumentsNum() const { return patchedArgumentsNum; } bool isPatched() const override; @@ -286,9 +291,11 @@ class MockKernelWithInternals { } ClDeviceVector deviceVector; deviceVector.push_back(&deviceArg); + kernelInfos.resize(deviceArg.getRootDeviceIndex() + 1); + kernelInfos[deviceArg.getRootDeviceIndex()] = &kernelInfo; mockProgram = new MockProgram(context, false, deviceVector); - mockKernel = new MockKernel(mockProgram, kernelInfo); + mockKernel = new MockKernel(mockProgram, kernelInfos); mockKernel->setCrossThreadData(&crossThreadData, sizeof(crossThreadData)); mockKernel->setSshLocal(&sshLocal, sizeof(sshLocal), deviceArg.getRootDeviceIndex()); @@ -339,6 +346,7 @@ class MockKernelWithInternals { MockKernel *mockKernel; MockProgram *mockProgram; Context *mockContext; + KernelInfoContainer kernelInfos; KernelInfo kernelInfo; SKernelBinaryHeaderCommon kernelHeader = {}; SPatchThreadPayload threadPayload = {}; @@ -358,14 +366,17 @@ class MockParentKernel : public Kernel { public: using Kernel::auxTranslationRequired; using Kernel::kernelDeviceInfos; - using Kernel::kernelInfo; + using Kernel::kernelInfos; using Kernel::patchBlocksCurbeWithConstantValues; static MockParentKernel *create(Context &context, bool addChildSimdSize = false, bool addChildGlobalMemory = false, bool addChildConstantMemory = false, bool addPrintfForParent = true, bool addPrintfForBlock = true) { auto clDevice = context.getDevice(0); auto rootDeviceIndex = clDevice->getRootDeviceIndex(); + KernelInfoContainer kernelInfos; + kernelInfos.resize(rootDeviceIndex + 1); auto info = new KernelInfo(); + kernelInfos[rootDeviceIndex] = info; const size_t crossThreadSize = 160; uint32_t crossThreadOffset = 0; uint32_t crossThreadOffsetBlock = 0; @@ -427,7 +438,7 @@ class MockParentKernel : public Kernel { UNRECOVERABLE_IF(crossThreadSize < crossThreadOffset + 8); info->crossThreadData = new char[crossThreadSize]; - auto parent = new MockParentKernel(mockProgram, *info); + auto parent = new MockParentKernel(mockProgram, kernelInfos); parent->kernelDeviceInfos[rootDeviceIndex].crossThreadData = new char[crossThreadSize]; memset(parent->kernelDeviceInfos[rootDeviceIndex].crossThreadData, 0, crossThreadSize); parent->kernelDeviceInfos[rootDeviceIndex].crossThreadDataSize = crossThreadSize; @@ -533,31 +544,37 @@ class MockParentKernel : public Kernel { return parent; } - MockParentKernel(Program *programArg, const KernelInfo &kernelInfoArg) : Kernel(programArg, kernelInfoArg, false) { + MockParentKernel(Program *programArg, const KernelInfoContainer &kernelInfoArg) : Kernel(programArg, kernelInfoArg, false) { } ~MockParentKernel() override { - delete kernelInfo.patchInfo.executionEnvironment; - delete kernelInfo.patchInfo.pAllocateStatelessDefaultDeviceQueueSurface; - delete kernelInfo.patchInfo.pAllocateStatelessEventPoolSurface; - delete kernelInfo.patchInfo.pAllocateStatelessPrintfSurface; - delete kernelInfo.patchInfo.threadPayload; - delete &kernelInfo; - BlockKernelManager *blockManager = program->getBlockKernelManager(); + for (auto &pKernelInfo : kernelInfos) { + if (!pKernelInfo) { + continue; + } + auto &kernelInfo = *pKernelInfo; + delete kernelInfo.patchInfo.executionEnvironment; + delete kernelInfo.patchInfo.pAllocateStatelessDefaultDeviceQueueSurface; + delete kernelInfo.patchInfo.pAllocateStatelessEventPoolSurface; + delete kernelInfo.patchInfo.pAllocateStatelessPrintfSurface; + delete kernelInfo.patchInfo.threadPayload; + delete &kernelInfo; + BlockKernelManager *blockManager = program->getBlockKernelManager(); - for (uint32_t i = 0; i < blockManager->getCount(); i++) { - const KernelInfo *blockInfo = blockManager->getBlockKernelInfo(i); - delete blockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface; - delete blockInfo->patchInfo.pAllocateStatelessEventPoolSurface; - delete blockInfo->patchInfo.pAllocateStatelessPrintfSurface; - delete blockInfo->patchInfo.threadPayload; - delete blockInfo->patchInfo.executionEnvironment; - delete blockInfo->patchInfo.dataParameterStream; - delete blockInfo->patchInfo.bindingTableState; - delete blockInfo->patchInfo.interfaceDescriptorData; - delete blockInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization; - delete blockInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization; - delete[](uint64_t *) blockInfo->heapInfo.pDsh; + for (uint32_t i = 0; i < blockManager->getCount(); i++) { + const KernelInfo *blockInfo = blockManager->getBlockKernelInfo(i); + delete blockInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface; + delete blockInfo->patchInfo.pAllocateStatelessEventPoolSurface; + delete blockInfo->patchInfo.pAllocateStatelessPrintfSurface; + delete blockInfo->patchInfo.threadPayload; + delete blockInfo->patchInfo.executionEnvironment; + delete blockInfo->patchInfo.dataParameterStream; + delete blockInfo->patchInfo.bindingTableState; + delete blockInfo->patchInfo.interfaceDescriptorData; + delete blockInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization; + delete blockInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization; + delete[](uint64_t *) blockInfo->heapInfo.pDsh; + } } if (mockProgram) { mockProgram->decRefInternal(); @@ -578,13 +595,13 @@ class MockParentKernel : public Kernel { class MockSchedulerKernel : public SchedulerKernel { public: - MockSchedulerKernel(Program *programArg, const KernelInfo &kernelInfoArg) : SchedulerKernel(programArg, kernelInfoArg){}; + MockSchedulerKernel(Program *programArg, const KernelInfoContainer &kernelInfoArg) : SchedulerKernel(programArg, kernelInfoArg){}; }; class MockDebugKernel : public MockKernel { public: - MockDebugKernel(Program *program, KernelInfo &kernelInfo) : MockKernel(program, kernelInfo) { - if (!kernelInfo.patchInfo.pAllocateSystemThreadSurface) { + MockDebugKernel(Program *program, KernelInfoContainer &kernelInfos) : MockKernel(program, kernelInfos) { + if (!kernelInfos[0]->patchInfo.pAllocateSystemThreadSurface) { SPatchAllocateSystemThreadSurface *patchToken = new SPatchAllocateSystemThreadSurface; patchToken->BTI = 0; @@ -593,7 +610,7 @@ class MockDebugKernel : public MockKernel { patchToken->Size = sizeof(SPatchAllocateSystemThreadSurface); patchToken->Token = iOpenCL::PATCH_TOKEN_ALLOCATE_SIP_SURFACE; - kernelInfo.patchInfo.pAllocateSystemThreadSurface = patchToken; + const_cast(kernelInfos[0])->patchInfo.pAllocateSystemThreadSurface = patchToken; systemThreadSurfaceAllocated = true; } @@ -601,7 +618,7 @@ class MockDebugKernel : public MockKernel { ~MockDebugKernel() override { if (systemThreadSurfaceAllocated) { - delete kernelInfo.patchInfo.pAllocateSystemThreadSurface; + delete kernelInfos[0]->patchInfo.pAllocateSystemThreadSurface; } } static const uint32_t perThreadSystemThreadSurfaceSize; diff --git a/opencl/test/unit_test/mocks/mock_program.h b/opencl/test/unit_test/mocks/mock_program.h index e04363c1b6..b2adbccdc5 100644 --- a/opencl/test/unit_test/mocks/mock_program.h +++ b/opencl/test/unit_test/mocks/mock_program.h @@ -11,6 +11,7 @@ #include "shared/source/helpers/string.h" #include "opencl/source/cl_device/cl_device.h" +#include "opencl/source/kernel/kernel.h" #include "opencl/source/program/kernel_info.h" #include "opencl/source/program/program.h" @@ -170,6 +171,15 @@ class MockProgram : public Program { Program::initInternalOptions(internalOptions); }; + const KernelInfoContainer getKernelInfosForKernel(const char *kernelName) const { + KernelInfoContainer kernelInfos; + kernelInfos.resize(getMaxRootDeviceIndex() + 1); + for (auto i = 0u; i < kernelInfos.size(); i++) { + kernelInfos[i] = getKernelInfo(kernelName, i); + } + return kernelInfos; + } + std::map processGenBinaryCalledPerRootDevice; std::map replaceDeviceBinaryCalledPerRootDevice; static int initInternalOptionsCalled; diff --git a/opencl/test/unit_test/preemption/preemption_tests.cpp b/opencl/test/unit_test/preemption/preemption_tests.cpp index 1fc38a0c29..79c9481b24 100644 --- a/opencl/test/unit_test/preemption/preemption_tests.cpp +++ b/opencl/test/unit_test/preemption/preemption_tests.cpp @@ -59,7 +59,8 @@ TEST_F(ThreadGroupPreemptionTests, disallowByReadWriteFencesWA) { TEST_F(ThreadGroupPreemptionTests, disallowBySchedulerKernel) { PreemptionFlags flags = {}; - kernel.reset(new MockKernel(program.get(), *kernelInfo, true)); + kernel.reset(new MockKernel(program.get(), + MockKernel::toKernelInfoContainer(*kernelInfo, rootDeviceIndex), true)); PreemptionHelper::setPreemptionLevelFlags(flags, device->getDevice(), kernel.get()); EXPECT_FALSE(PreemptionHelper::allowThreadGroupPreemption(flags)); EXPECT_EQ(PreemptionMode::MidBatch, PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags)); @@ -68,7 +69,7 @@ TEST_F(ThreadGroupPreemptionTests, disallowBySchedulerKernel) { TEST_F(ThreadGroupPreemptionTests, disallowByVmeKernel) { PreemptionFlags flags = {}; kernelInfo->isVmeWorkload = true; - kernel.reset(new MockKernel(program.get(), *kernelInfo)); + kernel.reset(new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, rootDeviceIndex))); PreemptionHelper::setPreemptionLevelFlags(flags, device->getDevice(), kernel.get()); EXPECT_FALSE(PreemptionHelper::allowThreadGroupPreemption(flags)); EXPECT_EQ(PreemptionMode::MidBatch, PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags)); @@ -148,7 +149,7 @@ TEST_F(ThreadGroupPreemptionTests, disallowDefaultDeviceModeForValidKernelsInMdi } TEST_F(ThreadGroupPreemptionTests, disallowDefaultDeviceModeWhenAtLeastOneInvalidKernelInMdi) { - MockKernel schedulerKernel(program.get(), *kernelInfo, true); + MockKernel schedulerKernel(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, rootDeviceIndex), true); DispatchInfo schedulerDispatchInfo(device.get(), &schedulerKernel, 1, Vec3(1, 1, 1), Vec3(1, 1, 1), Vec3(0, 0, 0)); PreemptionFlags flags = {}; @@ -183,7 +184,7 @@ TEST_F(MidThreadPreemptionTests, allowMidThreadPreemptionDeviceSupportPreemption device->setPreemptionMode(PreemptionMode::MidThread); device->sharedDeviceInfo.vmeAvcSupportsPreemption = true; kernelInfo->isVmeWorkload = true; - kernel.reset(new MockKernel(program.get(), *kernelInfo)); + kernel.reset(new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, rootDeviceIndex))); PreemptionHelper::setPreemptionLevelFlags(flags, device->getDevice(), kernel.get()); EXPECT_TRUE(PreemptionHelper::allowMidThreadPreemption(flags)); } @@ -210,7 +211,7 @@ TEST_F(MidThreadPreemptionTests, disallowMidThreadPreemptionByVmeKernel) { device->setPreemptionMode(PreemptionMode::MidThread); device->sharedDeviceInfo.vmeAvcSupportsPreemption = false; kernelInfo->isVmeWorkload = true; - kernel.reset(new MockKernel(program.get(), *kernelInfo)); + kernel.reset(new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, rootDeviceIndex))); PreemptionHelper::setPreemptionLevelFlags(flags, device->getDevice(), kernel.get()); EXPECT_FALSE(PreemptionHelper::allowMidThreadPreemption(flags)); } @@ -237,7 +238,7 @@ TEST_F(MidThreadPreemptionTests, taskPreemptionDisallowMidThreadByVmeKernel) { PreemptionFlags flags = {}; kernelInfo->isVmeWorkload = true; device->sharedDeviceInfo.vmeAvcSupportsPreemption = false; - kernel.reset(new MockKernel(program.get(), *kernelInfo)); + kernel.reset(new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, rootDeviceIndex))); device->setPreemptionMode(PreemptionMode::MidThread); PreemptionHelper::setPreemptionLevelFlags(flags, device->getDevice(), kernel.get()); PreemptionMode outMode = PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags); @@ -258,7 +259,7 @@ TEST_F(MidThreadPreemptionTests, taskPreemptionAllowDeviceSupportsPreemptionOnVm PreemptionFlags flags = {}; executionEnvironment->DisableMidThreadPreemption = 0; kernelInfo->isVmeWorkload = true; - kernel.reset(new MockKernel(program.get(), *kernelInfo)); + kernel.reset(new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, rootDeviceIndex))); device->sharedDeviceInfo.vmeAvcSupportsPreemption = true; device->setPreemptionMode(PreemptionMode::MidThread); PreemptionHelper::setPreemptionLevelFlags(flags, device->getDevice(), kernel.get()); @@ -272,7 +273,7 @@ TEST_F(ThreadGroupPreemptionTests, GivenDebugKernelPreemptionWhenDeviceSupportsT EXPECT_EQ(PreemptionMode::ThreadGroup, device->getPreemptionMode()); PreemptionFlags flags = {}; - kernel.reset(new MockKernel(program.get(), *kernelInfo)); + kernel.reset(new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, rootDeviceIndex))); PreemptionHelper::setPreemptionLevelFlags(flags, device->getDevice(), kernel.get()); PreemptionMode outMode = PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags); EXPECT_EQ(PreemptionMode::MidThread, outMode); @@ -284,7 +285,7 @@ TEST_F(MidThreadPreemptionTests, GivenDebugKernelPreemptionWhenDeviceSupportsMid EXPECT_EQ(PreemptionMode::MidThread, device->getPreemptionMode()); PreemptionFlags flags = {}; - kernel.reset(new MockKernel(program.get(), *kernelInfo)); + kernel.reset(new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, rootDeviceIndex))); PreemptionHelper::setPreemptionLevelFlags(flags, device->getDevice(), kernel.get()); PreemptionMode outMode = PreemptionHelper::taskPreemptionMode(device->getPreemptionMode(), flags); EXPECT_EQ(PreemptionMode::MidBatch, outMode); diff --git a/opencl/test/unit_test/profiling/profiling_tests.cpp b/opencl/test/unit_test/profiling/profiling_tests.cpp index b672ecce4b..13ffe076f1 100644 --- a/opencl/test/unit_test/profiling/profiling_tests.cpp +++ b/opencl/test/unit_test/profiling/profiling_tests.cpp @@ -78,7 +78,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfilingAndFor typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; - MockKernel kernel(program.get(), kernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); uint64_t requiredSize = 2 * sizeof(PIPE_CONTROL) + 2 * sizeof(MI_STORE_REGISTER_MEM) + sizeof(GPGPU_WALKER) + HardwareCommandsHelper::getSizeRequiredCS(&kernel); @@ -123,7 +123,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfilingAndFor typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; - MockKernel kernel(program.get(), kernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); uint64_t requiredSize = 2 * sizeof(PIPE_CONTROL) + 4 * sizeof(MI_STORE_REGISTER_MEM) + HardwareCommandsHelper::getSizeRequiredCS(&kernel); requiredSize += 2 * sizeof(GPGPU_WALKER); @@ -149,7 +149,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfolingWhenWa typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; - MockKernel kernel(program.get(), kernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; @@ -196,7 +196,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfolingWhenWa } HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfilingWhenNonBlockedEnqueueIsExecutedThenSubmittedTimestampDoesntHaveGPUTime) { - MockKernel kernel(program.get(), kernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; @@ -232,7 +232,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProflingWhenWal typedef typename FamilyType::MI_STORE_REGISTER_MEM MI_STORE_REGISTER_MEM; typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; - MockKernel kernel(program.get(), kernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; @@ -286,7 +286,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueBlockedWithProfilin typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; - MockKernel kernel(program.get(), kernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; @@ -343,7 +343,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueBlockedWithProfilin typedef typename FamilyType::MI_STORE_REGISTER_MEM MI_STORE_REGISTER_MEM; typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; - MockKernel kernel(program.get(), kernelInfo); + MockKernel kernel(program.get(), MockKernel::toKernelInfoContainer(kernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, kernel.initialize()); size_t globalOffsets[3] = {0, 0, 0}; diff --git a/opencl/test/unit_test/program/printf_handler_tests.cpp b/opencl/test/unit_test/program/printf_handler_tests.cpp index 2c595dc65d..5ad15c7ec9 100644 --- a/opencl/test/unit_test/program/printf_handler_tests.cpp +++ b/opencl/test/unit_test/program/printf_handler_tests.cpp @@ -31,7 +31,7 @@ TEST(PrintfHandlerTest, givenNotPreparedPrintfHandlerWhenGetSurfaceIsCalledThenR pKernelInfo->patchInfo.pAllocateStatelessPrintfSurface = pPrintfSurface; MockProgram *pProgram = new MockProgram(&context, false, toClDeviceVector(*device)); - MockKernel *pKernel = new MockKernel(pProgram, *pKernelInfo); + MockKernel *pKernel = new MockKernel(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, device->getRootDeviceIndex())); MockMultiDispatchInfo multiDispatchInfo(device, pKernel); PrintfHandler *printfHandler = PrintfHandler::create(multiDispatchInfo, *device); @@ -59,7 +59,8 @@ TEST(PrintfHandlerTest, givenPreparedPrintfHandlerWhenGetSurfaceIsCalledThenResu MockProgram *pProgram = new MockProgram(&context, false, toClDeviceVector(*device)); uint64_t crossThread[10]; - MockKernel *pKernel = new MockKernel(pProgram, *pKernelInfo); + MockKernel *pKernel = new MockKernel(pProgram, + MockKernel::toKernelInfoContainer(*pKernelInfo, device->getRootDeviceIndex())); pKernel->setCrossThreadData(&crossThread, sizeof(uint64_t) * 8); MockMultiDispatchInfo multiDispatchInfo(device, pKernel); @@ -127,9 +128,9 @@ TEST(PrintfHandlerTest, givenMultiDispatchInfoWithMultipleKernelsWhenCreatingAnd mainKernelInfo->patchInfo.pAllocateStatelessPrintfSurface = printfSurface.get(); uint64_t crossThread[8]; - auto mainKernel = std::make_unique(program.get(), *mainKernelInfo); - auto kernel1 = std::make_unique(program.get(), *kernelInfo); - auto kernel2 = std::make_unique(program.get(), *kernelInfo); + auto mainKernel = std::make_unique(program.get(), MockKernel::toKernelInfoContainer(*mainKernelInfo, device->getRootDeviceIndex())); + auto kernel1 = std::make_unique(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, device->getRootDeviceIndex())); + auto kernel2 = std::make_unique(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, device->getRootDeviceIndex())); mainKernel->setCrossThreadData(&crossThread, sizeof(uint64_t) * 8); DispatchInfo mainDispatchInfo(device.get(), mainKernel.get(), 1, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}); @@ -198,7 +199,7 @@ TEST(PrintfHandlerTest, GivenAllocationInLocalMemoryWhichRequiresBlitterWhenPrep auto program = std::make_unique(&context, false, toClDeviceVector(*pClDevice)); uint64_t crossThread[10]; - auto kernel = std::make_unique(program.get(), *kernelInfo); + auto kernel = std::make_unique(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, pClDevice->getRootDeviceIndex())); kernel->setCrossThreadData(&crossThread, sizeof(uint64_t) * 8); MockMultiDispatchInfo multiDispatchInfo(pClDevice.get(), kernel.get()); @@ -227,7 +228,7 @@ TEST_F(PrintfHandlerMultiRootDeviceTests, printfSurfaceHasCorrectRootDeviceIndex auto program = std::make_unique(context.get(), false, toClDeviceVector(*device1)); uint64_t crossThread[10]; - auto kernel = std::make_unique(program.get(), *kernelInfo); + auto kernel = std::make_unique(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, device1->getRootDeviceIndex())); kernel->setCrossThreadData(&crossThread, sizeof(uint64_t) * 8); MockMultiDispatchInfo multiDispatchInfo(device1, kernel.get()); diff --git a/opencl/test/unit_test/program/printf_helper_tests.cpp b/opencl/test/unit_test/program/printf_helper_tests.cpp index d100de9459..9ca77e4674 100644 --- a/opencl/test/unit_test/program/printf_helper_tests.cpp +++ b/opencl/test/unit_test/program/printf_helper_tests.cpp @@ -51,8 +51,9 @@ class PrintFormatterTest : public testing::Test { kernelInfo = std::make_unique(); device = new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}; + auto rootDeviceIndex = device->getRootDeviceIndex(); program = std::make_unique(toClDeviceVector(*device)); - kernel = new MockKernel(program.get(), *kernelInfo); + kernel = new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, rootDeviceIndex)); printFormatter = std::unique_ptr(new PrintFormatter(static_cast(data->getUnderlyingBuffer()), printfBufferSize, is32bit, kernelInfo->patchInfo.stringDataMap)); diff --git a/opencl/test/unit_test/program/program_nonuniform.cpp b/opencl/test/unit_test/program/program_nonuniform.cpp index 3017a36f47..510eae8da5 100644 --- a/opencl/test/unit_test/program/program_nonuniform.cpp +++ b/opencl/test/unit_test/program/program_nonuniform.cpp @@ -106,12 +106,12 @@ TEST(KernelNonUniform, WhenSettingAllowNonUniformThenGettingAllowNonUniformRetur MockClDevice device{new MockDevice()}; MockProgram program(toClDeviceVector(device)); struct KernelMock : Kernel { - KernelMock(Program *p, KernelInfo &ki) - : Kernel(p, ki, false) { + KernelMock(Program *program, KernelInfoContainer &kernelInfos) + : Kernel(program, kernelInfos, false) { } }; - - KernelMock k{&program, kernelInfo}; + auto kernelInfos = MockKernel::toKernelInfoContainer(kernelInfo, device.getRootDeviceIndex()); + KernelMock k{&program, kernelInfos}; program.setAllowNonUniform(false); EXPECT_FALSE(k.getAllowNonUniform()); program.setAllowNonUniform(true); @@ -201,7 +201,8 @@ TEST_F(ProgramNonUniformTest, GivenCl21WhenExecutingKernelWithNonUniformThenEnqu EXPECT_NE(nullptr, pKernelInfo); // create a kernel - auto pKernel = Kernel::create(mockProgram, *pKernelInfo, &retVal); + auto pKernel = Kernel::create(mockProgram, + MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, pKernel); @@ -240,7 +241,8 @@ TEST_F(ProgramNonUniformTest, GivenCl20WhenExecutingKernelWithNonUniformThenEnqu EXPECT_NE(nullptr, pKernelInfo); // create a kernel - auto pKernel = Kernel::create(mockProgram, *pKernelInfo, &retVal); + auto pKernel = Kernel::create(mockProgram, + MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, pKernel); @@ -277,7 +279,8 @@ TEST_F(ProgramNonUniformTest, GivenCl12WhenExecutingKernelWithNonUniformThenInva EXPECT_NE(nullptr, pKernelInfo); // create a kernel - auto pKernel = Kernel::create(mockProgram, *pKernelInfo, &retVal); + auto pKernel = Kernel::create(mockProgram, + MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), &retVal); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, pKernel); diff --git a/opencl/test/unit_test/program/program_tests.cpp b/opencl/test/unit_test/program/program_tests.cpp index a406940ae3..5f7fdb96f0 100644 --- a/opencl/test/unit_test/program/program_tests.cpp +++ b/opencl/test/unit_test/program/program_tests.cpp @@ -1349,7 +1349,8 @@ HWTEST_F(PatchTokenTests, givenKernelRequiringConstantAllocationWhenMakeResident EXPECT_EQ(expected_values[0], constBuff[0]); EXPECT_EQ(expected_values[1], constBuff[1]); - std::unique_ptr pKernel(Kernel::create(pProgram, *pKernelInfo, &retVal)); + std::unique_ptr pKernel(Kernel::create(pProgram, + MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), &retVal)); ASSERT_EQ(CL_SUCCESS, retVal); ASSERT_NE(nullptr, pKernel); @@ -1458,7 +1459,7 @@ TEST_F(PatchTokenTests, WhenBuildingProgramThenConstantKernelArgsAreAvailable) { auto pKernel = Kernel::create( pProgram, - *pKernelInfo, + MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), &retVal); ASSERT_EQ(CL_SUCCESS, retVal); @@ -1498,7 +1499,7 @@ TEST_F(PatchTokenTests, GivenVmeKernelWhenBuildingKernelThenArgAvailable) { auto pKernel = Kernel::create( pProgram, - *pKernelInfo, + MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), &retVal); ASSERT_NE(nullptr, pKernel); diff --git a/opencl/test/unit_test/sampler/sampler_set_arg_tests.cpp b/opencl/test/unit_test/sampler/sampler_set_arg_tests.cpp index f613d09dac..f2fa083c1e 100644 --- a/opencl/test/unit_test/sampler/sampler_set_arg_tests.cpp +++ b/opencl/test/unit_test/sampler/sampler_set_arg_tests.cpp @@ -55,7 +55,7 @@ class SamplerSetArgFixture : public ClDeviceFixture { pKernelInfo->kernelArgInfo[1].isSampler = true; program = std::make_unique(toClDeviceVector(*pClDevice)); - pKernel = new MockKernel(program.get(), *pKernelInfo); + pKernel = new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); ASSERT_NE(nullptr, pKernel); ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); @@ -200,7 +200,7 @@ HWTEST_F(SamplerSetArgTest, GivenSamplerObjectWhenSetKernelArgIsCalledThenSample } HWTEST_F(SamplerSetArgTest, GivenSamplerObjectWhenSetKernelArgIsCalledAndKernelIsDeletedThenRefCountIsUnchanged) { - auto myKernel = std::make_unique(program.get(), *pKernelInfo); + auto myKernel = std::make_unique(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); ASSERT_NE(nullptr, myKernel.get()); ASSERT_EQ(CL_SUCCESS, myKernel->initialize()); @@ -374,7 +374,7 @@ TEST_F(SamplerSetArgTest, givenSamplerTypeStrAndIsSamplerTrueWhenInitializeKerne pKernelInfo->kernelArgInfo[1].metadataExtended->type = "sampler"; pKernelInfo->kernelArgInfo[1].isSampler = true; - auto pMockKernell = std::make_unique(program.get(), *pKernelInfo); + auto pMockKernell = std::make_unique(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, pMockKernell->initialize()); EXPECT_EQ(pMockKernell->getKernelArguments()[0].type, MockKernel::SAMPLER_OBJ); EXPECT_EQ(pMockKernell->getKernelArguments()[1].type, MockKernel::SAMPLER_OBJ); @@ -389,7 +389,7 @@ TEST_F(SamplerSetArgTest, givenSamplerTypeStrAndAndIsSamplerFalseWhenInitializeK pKernelInfo->kernelArgInfo[1].metadataExtended->type = "sampler"; pKernelInfo->kernelArgInfo[1].isSampler = false; - auto pMockKernell = std::make_unique(program.get(), *pKernelInfo); + auto pMockKernell = std::make_unique(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex)); ASSERT_EQ(CL_SUCCESS, pMockKernell->initialize()); EXPECT_NE(pMockKernell->getKernelArguments()[0].type, MockKernel::SAMPLER_OBJ); EXPECT_NE(pMockKernell->getKernelArguments()[1].type, MockKernel::SAMPLER_OBJ); diff --git a/opencl/test/unit_test/scheduler/scheduler_kernel_tests.cpp b/opencl/test/unit_test/scheduler/scheduler_kernel_tests.cpp index acad5c8912..62df2fec4e 100644 --- a/opencl/test/unit_test/scheduler/scheduler_kernel_tests.cpp +++ b/opencl/test/unit_test/scheduler/scheduler_kernel_tests.cpp @@ -26,7 +26,7 @@ using namespace NEO; class MockSchedulerKernel : public SchedulerKernel { public: - MockSchedulerKernel(Program *program, const KernelInfo &info) : SchedulerKernel(program, info) { + MockSchedulerKernel(Program *program, const KernelInfoContainer &info) : SchedulerKernel(program, info) { } static MockSchedulerKernel *create(Program &program, KernelInfo *&info) { @@ -52,7 +52,12 @@ class MockSchedulerKernel : public SchedulerKernel { info->kernelArgInfo.push_back(std::move(bufferArg)); } - MockSchedulerKernel *mock = Kernel::create(&program, *info, nullptr); + KernelInfoContainer kernelInfos; + auto rootDeviceIndex = program.getDevices()[0]->getRootDeviceIndex(); + kernelInfos.resize(rootDeviceIndex + 1); + kernelInfos[rootDeviceIndex] = info; + + MockSchedulerKernel *mock = Kernel::create(&program, kernelInfos, nullptr); return mock; } }; @@ -61,7 +66,9 @@ TEST(SchedulerKernelTest, WhenSchedulerKernelIsCreatedThenLwsIs24) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(toClDeviceVector(*device)); KernelInfo info; - MockSchedulerKernel kernel(&program, info); + KernelInfoContainer kernelInfos; + kernelInfos.push_back(&info); + MockSchedulerKernel kernel(&program, kernelInfos); size_t lws = kernel.getLws(); EXPECT_EQ((size_t)24u, lws); @@ -71,7 +78,9 @@ TEST(SchedulerKernelTest, WhenSchedulerKernelIsCreatedThenGwsIs24) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(toClDeviceVector(*device)); KernelInfo info; - MockSchedulerKernel kernel(&program, info); + KernelInfoContainer kernelInfos; + kernelInfos.push_back(&info); + MockSchedulerKernel kernel(&program, kernelInfos); const size_t hwThreads = 3; const size_t simdSize = 8; @@ -87,7 +96,9 @@ TEST(SchedulerKernelTest, WhenSettingGwsThenGetGwsReturnedSetValue) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(toClDeviceVector(*device)); KernelInfo info; - MockSchedulerKernel kernel(&program, info); + KernelInfoContainer kernelInfos; + kernelInfos.push_back(&info); + MockSchedulerKernel kernel(&program, kernelInfos); kernel.setGws(24); @@ -109,7 +120,9 @@ TEST(SchedulerKernelTest, WhenSchedulerKernelIsCreatedThenCurbeSizeIsCorrect) { info.patchInfo.dataParameterStream = &dataParameterStream; info.heapInfo.DynamicStateHeapSize = dshSize; - MockSchedulerKernel kernel(&program, info); + KernelInfoContainer kernelInfos; + kernelInfos.push_back(&info); + MockSchedulerKernel kernel(&program, kernelInfos); uint32_t expectedCurbeSize = alignUp(crossTrheadDataSize, 64) + alignUp(dshSize, 64) + alignUp(SCHEDULER_DYNAMIC_PAYLOAD_SIZE, 64); EXPECT_GE((size_t)expectedCurbeSize, kernel.getCurbeSize()); @@ -270,7 +283,9 @@ TEST(SchedulerKernelTest, GivenNullKernelInfoWhenGettingCurbeSizeThenSizeIsCorre info.patchInfo.dataParameterStream = nullptr; - MockSchedulerKernel kernel(&program, info); + KernelInfoContainer kernelInfos; + kernelInfos.push_back(&info); + MockSchedulerKernel kernel(&program, kernelInfos); uint32_t expectedCurbeSize = alignUp(SCHEDULER_DYNAMIC_PAYLOAD_SIZE, 64); EXPECT_GE((size_t)expectedCurbeSize, kernel.getCurbeSize()); @@ -283,7 +298,9 @@ TEST(SchedulerKernelTest, givenForcedSchedulerGwsByDebugVariableWhenSchedulerKer auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(toClDeviceVector(*device)); KernelInfo info; - MockSchedulerKernel kernel(&program, info); + KernelInfoContainer kernelInfos; + kernelInfos.push_back(&info); + MockSchedulerKernel kernel(&program, kernelInfos); size_t gws = kernel.getGws(); EXPECT_EQ(static_cast(48u), gws); @@ -297,7 +314,9 @@ TEST(SchedulerKernelTest, givenSimulationModeWhenSchedulerKernelIsCreatedThenGws MockProgram program(toClDeviceVector(*device)); KernelInfo info; - MockSchedulerKernel kernel(&program, info); + KernelInfoContainer kernelInfos; + kernelInfos.push_back(&info); + MockSchedulerKernel kernel(&program, kernelInfos); size_t gws = kernel.getGws(); EXPECT_EQ(static_cast(24u), gws); } @@ -313,7 +332,9 @@ TEST(SchedulerKernelTest, givenForcedSchedulerGwsByDebugVariableAndSimulationMod MockProgram program(toClDeviceVector(*device)); KernelInfo info; - MockSchedulerKernel kernel(&program, info); + KernelInfoContainer kernelInfos; + kernelInfos.push_back(&info); + MockSchedulerKernel kernel(&program, kernelInfos); size_t gws = kernel.getGws(); EXPECT_EQ(static_cast(48u), gws); } diff --git a/opencl/test/unit_test/utilities/file_logger_tests.cpp b/opencl/test/unit_test/utilities/file_logger_tests.cpp index 35304c1bb1..2afa1e3115 100644 --- a/opencl/test/unit_test/utilities/file_logger_tests.cpp +++ b/opencl/test/unit_test/utilities/file_logger_tests.cpp @@ -348,7 +348,7 @@ TEST(FileLogger, GivenDebugFunctionalityWhenDebugFlagIsDisabledThenDoNotDumpKern auto kernelInfo = std::make_unique(); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(toClDeviceVector(*device)); - auto kernel = std::unique_ptr(new MockKernel(&program, *kernelInfo)); + auto kernel = std::unique_ptr(new MockKernel(&program, MockKernel::toKernelInfoContainer(*kernelInfo, mockRootDeviceIndex))); auto multiDispatchInfo = std::unique_ptr(new MockMultiDispatchInfo(device.get(), kernel.get())); KernelArgPatchInfo kernelArgPatchInfo; @@ -383,7 +383,7 @@ TEST(FileLogger, GivenMdiWhenDumpingKernelArgsThenFileIsCreated) { auto kernelInfo = std::make_unique(); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(toClDeviceVector(*device)); - auto kernel = std::unique_ptr(new MockKernel(&program, *kernelInfo)); + auto kernel = std::unique_ptr(new MockKernel(&program, MockKernel::toKernelInfoContainer(*kernelInfo, mockRootDeviceIndex))); auto multiDispatchInfo = std::unique_ptr(new MockMultiDispatchInfo(device.get(), kernel.get())); KernelArgPatchInfo kernelArgPatchInfo; @@ -428,7 +428,7 @@ TEST(FileLogger, GivenEmptyKernelWhenDumpingKernelArgsThenFileIsNotCreated) { auto kernelInfo = std::make_unique(); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(toClDeviceVector(*device)); - auto kernel = std::unique_ptr(new MockKernel(&program, *kernelInfo)); + auto kernel = std::unique_ptr(new MockKernel(&program, MockKernel::toKernelInfoContainer(*kernelInfo, mockRootDeviceIndex))); std::string testFile = "testfile"; DebugVariables flags; @@ -444,7 +444,7 @@ TEST(FileLogger, GivenImmediateWhenDumpingKernelArgsThenFileIsCreated) { auto kernelInfo = std::make_unique(); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(toClDeviceVector(*device)); - auto kernel = std::unique_ptr(new MockKernel(&program, *kernelInfo)); + auto kernel = std::unique_ptr(new MockKernel(&program, MockKernel::toKernelInfoContainer(*kernelInfo, mockRootDeviceIndex))); KernelArgPatchInfo kernelArgPatchInfo; @@ -478,7 +478,7 @@ TEST(FileLogger, GivenImmediateZeroSizeWhenDumpingKernelArgsThenFileIsNotCreated auto kernelInfo = std::make_unique(); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(toClDeviceVector(*device)); - auto kernel = std::unique_ptr(new MockKernel(&program, *kernelInfo)); + auto kernel = std::unique_ptr(new MockKernel(&program, MockKernel::toKernelInfoContainer(*kernelInfo, mockRootDeviceIndex))); KernelArgPatchInfo kernelArgPatchInfo; @@ -508,7 +508,7 @@ TEST(FileLogger, GivenLocalBufferWhenDumpingKernelArgsThenFileIsNotCreated) { auto kernelInfo = std::make_unique(); auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); MockProgram program(toClDeviceVector(*device)); - auto kernel = std::unique_ptr(new MockKernel(&program, *kernelInfo)); + auto kernel = std::unique_ptr(new MockKernel(&program, MockKernel::toKernelInfoContainer(*kernelInfo, mockRootDeviceIndex))); KernelArgPatchInfo kernelArgPatchInfo; @@ -531,7 +531,7 @@ TEST(FileLogger, GivenBufferNotSetWhenDumpingKernelArgsThenFileIsNotCreated) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto context = clUniquePtr(new MockContext(device.get())); auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device))); - auto kernel = clUniquePtr(new MockKernel(program.get(), *kernelInfo)); + auto kernel = clUniquePtr(new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, mockRootDeviceIndex))); KernelArgPatchInfo kernelArgPatchInfo; @@ -565,7 +565,7 @@ TEST(FileLogger, GivenBufferWhenDumpingKernelArgsThenFileIsCreated) { auto kernelInfo = std::make_unique(); auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device))); - auto kernel = clUniquePtr(new MockKernel(program.get(), *kernelInfo)); + auto kernel = clUniquePtr(new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, mockRootDeviceIndex))); KernelArgPatchInfo kernelArgPatchInfo; @@ -604,7 +604,7 @@ TEST(FileLogger, GivenSamplerWhenDumpingKernelArgsThenFileIsNotCreated) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto context = clUniquePtr(new MockContext(device.get())); auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device))); - auto kernel = clUniquePtr(new MockKernel(program.get(), *kernelInfo)); + auto kernel = clUniquePtr(new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, mockRootDeviceIndex))); KernelArgPatchInfo kernelArgPatchInfo; @@ -632,7 +632,7 @@ TEST(FileLogger, GivenImageNotSetWhenDumpingKernelArgsThenFileIsNotCreated) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto context = clUniquePtr(new MockContext(device.get())); auto program = clUniquePtr(new MockProgram(context.get(), false, toClDeviceVector(*device))); - auto kernel = clUniquePtr(new MockKernel(program.get(), *kernelInfo)); + auto kernel = clUniquePtr(new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*kernelInfo, mockRootDeviceIndex))); char surfaceStateHeap[0x80]; kernelInfo->heapInfo.pSsh = surfaceStateHeap;