From c8d1e082dd03e4eced330a6c0a754083795af005 Mon Sep 17 00:00:00 2001 From: Mateusz Jablonski Date: Mon, 7 Dec 2020 14:41:52 +0000 Subject: [PATCH] Update getKernelInfo method add root device index parameter to return proper kernel info Related-To: NEO-5001 Signed-off-by: Mateusz Jablonski --- opencl/source/api/api.cpp | 16 +++--- .../source/built_ins/vme_dispatch_builder.h | 57 ++++++++++--------- opencl/source/command_queue/command_queue.cpp | 8 +-- opencl/source/command_queue/enqueue_common.h | 11 ++-- opencl/source/command_queue/enqueue_kernel.h | 17 +++--- .../command_queue/gpgpu_walker_bdw_plus.inl | 13 +++-- .../hardware_interface_bdw_plus.inl | 9 +-- .../source/command_queue/local_work_size.cpp | 10 ++-- .../aub_command_stream_receiver_hw_base.inl | 2 +- .../tbx_command_stream_receiver_hw.inl | 2 +- opencl/source/gen8/gpgpu_walker_gen8.cpp | 6 +- opencl/source/gen9/gpgpu_walker_gen9.cpp | 6 +- opencl/source/gtpin/gtpin_callbacks.cpp | 2 +- opencl/source/helpers/dispatch_info.cpp | 2 +- .../source/helpers/hardware_commands_helper.h | 13 +++-- .../helpers/hardware_commands_helper_base.inl | 44 +++++++------- .../hardware_commands_helper_bdw_plus.inl | 12 ++-- opencl/source/helpers/task_information.cpp | 7 ++- opencl/source/kernel/kernel.cpp | 29 +++++----- opencl/source/kernel/kernel.h | 10 ++-- opencl/source/kernel/kernel_extra.cpp | 2 +- opencl/source/program/kernel_info.cpp | 9 +-- opencl/source/program/printf_handler.cpp | 8 +-- opencl/source/utilities/logger.cpp | 8 +-- ...te_program_with_built_in_kernels_tests.cpp | 12 ++-- ...cl_get_kernel_sub_group_info_khr_tests.inl | 8 +-- .../cl_get_kernel_sub_group_info_tests.inl | 18 +++--- .../cl_get_kernel_work_group_info_tests.inl | 2 +- ...cl_mem_locally_uncached_resource_tests.cpp | 2 +- .../enqueue_kernel_aub_tests.cpp | 10 ++-- .../unit_test/built_ins/built_in_tests.cpp | 42 +++++++------- .../command_queue/command_enqueue_fixture.h | 12 ++-- .../command_queue/dispatch_walker_tests.cpp | 2 +- .../enqueue_copy_buffer_rect_tests.cpp | 4 +- .../enqueue_copy_buffer_tests.cpp | 4 +- .../enqueue_debug_kernel_tests.cpp | 4 +- .../enqueue_fill_buffer_tests.cpp | 10 ++-- .../command_queue/enqueue_kernel_1_tests.cpp | 2 +- .../command_queue/enqueue_kernel_2_tests.cpp | 4 +- .../enqueue_read_buffer_rect_tests.cpp | 14 +++-- .../enqueue_svm_mem_copy_tests.cpp | 2 +- .../enqueue_svm_mem_fill_tests.cpp | 2 +- .../enqueue_write_buffer_rect_tests.cpp | 13 +++-- .../get_size_required_buffer_tests.cpp | 4 +- .../get_size_required_image_tests.cpp | 18 +++--- .../enqueue_kernel_gl_tests_windows.cpp | 2 +- .../driver_diagnostics_enqueue_tests.cpp | 18 +++--- .../context/driver_diagnostics_tests.cpp | 13 +++-- .../context/driver_diagnostics_tests.h | 2 + .../device_queue/device_queue_hw_tests.cpp | 4 +- opencl/test/unit_test/event/event_tests.cpp | 2 +- .../enqueue_execution_model_kernel_tests.cpp | 8 +-- .../parent_kernel_dispatch_tests.cpp | 8 +-- .../scheduler_dispatch_tests.cpp | 4 +- opencl/test/unit_test/gtpin/gtpin_tests.cpp | 18 +++--- .../hardware_commands_helper_tests.cpp | 52 ++++++++--------- .../helpers/hardware_commands_helper_tests.h | 2 +- .../helpers/task_information_tests.cpp | 3 +- .../unit_test/kernel/clone_kernel_tests.cpp | 18 +++--- .../kernel/kernel_image_arg_tests.cpp | 8 +-- .../kernel_reflection_surface_tests.cpp | 10 ++-- .../unit_test/kernel/kernel_slm_tests.cpp | 2 +- opencl/test/unit_test/kernel/kernel_tests.cpp | 12 ++-- .../unit_test/kernel/parent_kernel_tests.cpp | 7 ++- .../memory_manager/memory_manager_tests.cpp | 7 ++- .../test/unit_test/program/program_tests.cpp | 2 +- ...d_write_buffer_scenarios_windows_tests.cpp | 13 +++-- shared/source/command_stream/preemption.cpp | 9 +-- shared/test/unit_test/cmd_parse/hw_parse.inl | 2 +- 69 files changed, 381 insertions(+), 337 deletions(-) diff --git a/opencl/source/api/api.cpp b/opencl/source/api/api.cpp index 68b4740e27..7fae08722d 100644 --- a/opencl/source/api/api.cpp +++ b/opencl/source/api/api.cpp @@ -1845,7 +1845,7 @@ cl_int CL_API_CALL clSetKernelArg(cl_kernel kernel, retVal = CL_INVALID_KERNEL; break; } - if (pKernel->getKernelInfo().kernelArgInfo.size() <= argIndex) { + if (pKernel->getDefaultKernelInfo().kernelArgInfo.size() <= argIndex) { retVal = CL_INVALID_ARG_INDEX; break; } @@ -4757,12 +4757,14 @@ cl_int CL_API_CALL clSetKernelArgSVMPointer(cl_kernel kernel, return retVal; } - cl_int kernelArgAddressQualifier = asClKernelArgAddressQualifier(pKernel->getKernelInfo().kernelArgInfo[argIndex].metadata.getAddressQualifier()); - if ((kernelArgAddressQualifier != CL_KERNEL_ARG_ADDRESS_GLOBAL) && - (kernelArgAddressQualifier != CL_KERNEL_ARG_ADDRESS_CONSTANT)) { - retVal = CL_INVALID_ARG_VALUE; - TRACING_EXIT(clSetKernelArgSVMPointer, &retVal); - return retVal; + for (const auto &pDevice : pKernel->getDevices()) { + cl_int kernelArgAddressQualifier = asClKernelArgAddressQualifier(pKernel->getKernelInfo(pDevice->getRootDeviceIndex()).kernelArgInfo[argIndex].metadata.getAddressQualifier()); + if ((kernelArgAddressQualifier != CL_KERNEL_ARG_ADDRESS_GLOBAL) && + (kernelArgAddressQualifier != CL_KERNEL_ARG_ADDRESS_CONSTANT)) { + retVal = CL_INVALID_ARG_VALUE; + TRACING_EXIT(clSetKernelArgSVMPointer, &retVal); + return retVal; + } } GraphicsAllocation *pSvmAlloc = nullptr; diff --git a/opencl/source/built_ins/vme_dispatch_builder.h b/opencl/source/built_ins/vme_dispatch_builder.h index 467eabd5d3..56dbd73b8a 100644 --- a/opencl/source/built_ins/vme_dispatch_builder.h +++ b/opencl/source/built_ins/vme_dispatch_builder.h @@ -27,15 +27,16 @@ class VmeBuiltinDispatchInfoBuilder : public BuiltinDispatchInfoBuilder { populate(builtinOp, mediaKernelsBuildOptions, kernelName, vmeKernel); - widthArgNum = vmeKernel->getKernelInfo().getArgNumByName("width"); - heightArgNum = vmeKernel->getKernelInfo().getArgNumByName("height"); - strideArgNum = vmeKernel->getKernelInfo().getArgNumByName("stride"); - acceleratorArgNum = vmeKernel->getKernelInfo().getArgNumByName("accelerator"); - srcImgArgNum = vmeKernel->getKernelInfo().getArgNumByName("srcImg"); - refImgArgNum = vmeKernel->getKernelInfo().getArgNumByName("refImg"); - motionVectorBufferArgNum = vmeKernel->getKernelInfo().getArgNumByName("motion_vector_buffer"); - predictionMotionVectorBufferArgNum = vmeKernel->getKernelInfo().getArgNumByName("prediction_motion_vector_buffer"); - residualsArgNum = vmeKernel->getKernelInfo().getArgNumByName("residuals"); + auto rootDeviceIndex = clDevice.getRootDeviceIndex(); + widthArgNum = vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("width"); + heightArgNum = vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("height"); + strideArgNum = vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("stride"); + acceleratorArgNum = vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("accelerator"); + srcImgArgNum = vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("srcImg"); + refImgArgNum = vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("refImg"); + motionVectorBufferArgNum = vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("motion_vector_buffer"); + predictionMotionVectorBufferArgNum = vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("prediction_motion_vector_buffer"); + residualsArgNum = vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("residuals"); } void getBlkTraits(const Vec3 &inGws, size_t &gwWidthInBlk, size_t &gwHeightInBlk) const { @@ -51,6 +52,8 @@ class VmeBuiltinDispatchInfoBuilder : public BuiltinDispatchInfoBuilder { return false; } + auto rootDeviceIndex = clDevice.getRootDeviceIndex(); + size_t gwWidthInBlk = 0; size_t gwHeightInBlk = 0; getBlkTraits(inGws, gwWidthInBlk, gwHeightInBlk); @@ -59,7 +62,7 @@ class VmeBuiltinDispatchInfoBuilder : public BuiltinDispatchInfoBuilder { cl_int width = (cl_int)gwWidthInBlk; cl_int stride = height; size_t numThreadsX = gwWidthInBlk; - const size_t simdWidth = vmeKernel->getKernelInfo().getMaxSimdSize(); + const size_t simdWidth = vmeKernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize(); stride = static_cast(Math::divideAndRoundUp(height * width, numThreadsX)); // update implicit args @@ -69,7 +72,7 @@ class VmeBuiltinDispatchInfoBuilder : public BuiltinDispatchInfoBuilder { // Update global work size to force macro-block to HW thread execution model Vec3 gws = {numThreadsX * simdWidth, 1, 1}; - Vec3 lws = {vmeKernel->getKernelInfo().kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0], 1, 1}; + Vec3 lws = {vmeKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0], 1, 1}; DispatchInfoBuilder builder(clDevice); builder.setDispatchGeometry(gws, lws, inOffset, gws, lws); @@ -164,9 +167,10 @@ class VmeBuiltinDispatchInfoBuilder : public BuiltinDispatchInfoBuilder { template RetType getKernelArgByValValue(uint32_t argNum) const { - auto &kai = vmeKernel->getKernelInfo().kernelArgInfo[argNum]; - DEBUG_BREAK_IF(kai.kernelArgPatchInfoVector.size() != 1); - const KernelArgPatchInfo &patchInfo = kai.kernelArgPatchInfoVector[0]; + auto rootDeviceIndex = clDevice.getRootDeviceIndex(); + auto &kernelArgInfo = vmeKernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[argNum]; + DEBUG_BREAK_IF(kernelArgInfo.kernelArgPatchInfoVector.size() != 1); + const KernelArgPatchInfo &patchInfo = kernelArgInfo.kernelArgPatchInfoVector[0]; DEBUG_BREAK_IF(sizeof(RetType) > patchInfo.size); return *(RetType *)(vmeKernel->getCrossThreadData(clDevice.getRootDeviceIndex()) + patchInfo.crossthreadOffset); } @@ -255,18 +259,19 @@ class AdvancedVmeBuiltinDispatchInfoBuilder : public VmeBuiltinDispatchInfoBuild const char *kernelName) : VmeBuiltinDispatchInfoBuilder(kernelsLib, device, builtinOp, kernelName) { - flagsArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("flags"); - intraSrcImgArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("intraSrcImg"); - skipBlockTypeArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("skip_block_type"); - searchCostPenaltyArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("search_cost_penalty"); - searchCostPrecisionArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("search_cost_precision"); - bidirWeightArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("bidir_weight"); - predictorsBufferArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("predictors_buffer"); - countMotionVectorBufferArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("count_motion_vector_buffer"); - skipMotionVectorBufferArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("skip_motion_vector_buffer"); - intraSearchPredictorModesArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("intra_search_predictor_modes"); - skipResidualsArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("skip_residuals"); - intraResidualsArgNum = this->vmeKernel->getKernelInfo().getArgNumByName("intra_residuals"); + auto rootDeviceIndex = clDevice.getRootDeviceIndex(); + flagsArgNum = this->vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("flags"); + intraSrcImgArgNum = this->vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("intraSrcImg"); + skipBlockTypeArgNum = this->vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("skip_block_type"); + searchCostPenaltyArgNum = this->vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("search_cost_penalty"); + searchCostPrecisionArgNum = this->vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("search_cost_precision"); + bidirWeightArgNum = this->vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("bidir_weight"); + predictorsBufferArgNum = this->vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("predictors_buffer"); + countMotionVectorBufferArgNum = this->vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("count_motion_vector_buffer"); + skipMotionVectorBufferArgNum = this->vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("skip_motion_vector_buffer"); + intraSearchPredictorModesArgNum = this->vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("intra_search_predictor_modes"); + skipResidualsArgNum = this->vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("skip_residuals"); + intraResidualsArgNum = this->vmeKernel->getKernelInfo(rootDeviceIndex).getArgNumByName("intra_residuals"); } bool setExplicitArg(uint32_t argIndex, size_t argSize, const void *argVal, cl_int &err) const override { diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index f3270c6b74..a69df40d47 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -528,10 +528,10 @@ void CommandQueue::enqueueBlockedMapUnmapOperation(const cl_event *eventWaitList bool CommandQueue::setupDebugSurface(Kernel *kernel) { auto debugSurface = getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation(); + auto rootDeviceIndex = device->getRootDeviceIndex(); DEBUG_BREAK_IF(!kernel->requiresSshForBuffers()); - - auto surfaceState = ptrOffset(reinterpret_cast(kernel->getSurfaceStateHeap(device->getRootDeviceIndex())), - kernel->getKernelInfo().patchInfo.pAllocateSystemThreadSurface->Offset); + auto surfaceState = ptrOffset(reinterpret_cast(kernel->getSurfaceStateHeap(rootDeviceIndex)), + kernel->getKernelInfo(rootDeviceIndex).patchInfo.pAllocateSystemThreadSurface->Offset); void *addressToPatch = reinterpret_cast(debugSurface->getGpuAddress()); size_t sizeToPatch = debugSurface->getUnderlyingBufferSize(); Buffer::setSurfaceState(&device->getDevice(), surfaceState, sizeToPatch, addressToPatch, 0, debugSurface, 0, 0); @@ -775,7 +775,7 @@ void CommandQueue::aubCaptureHook(bool &blocking, bool &clearAllDependencies, co if (getGpgpuCommandStreamReceiver().getType() > CommandStreamReceiverType::CSR_HW) { for (auto &dispatchInfo : multiDispatchInfo) { - auto kernelName = dispatchInfo.getKernel()->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName; + auto kernelName = dispatchInfo.getKernel()->getKernelInfo(device->getRootDeviceIndex()).kernelDescriptor.kernelMetadata.kernelName; getGpgpuCommandStreamReceiver().addAubComment(kernelName.c_str()); } } diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index f228d7ed7c..e2b0d0689b 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -73,14 +73,14 @@ void CommandQueueHw::enqueueHandler(Surface *(&surfaces)[surfaceCount dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::AuxToNonAux); } } - - if (kernel->getKernelInfo().builtinDispatchBuilder == nullptr) { + auto rootDeviceIndex = device->getRootDeviceIndex(); + if (kernel->getKernelInfo(rootDeviceIndex).builtinDispatchBuilder == nullptr) { DispatchInfoBuilder builder(getClDevice()); builder.setDispatchGeometry(workDim, workItems, enqueuedWorkSizes, globalOffsets, Vec3{0, 0, 0}, localWorkSizesIn); builder.setKernel(kernel); builder.bake(multiDispatchInfo); } else { - auto builder = kernel->getKernelInfo().builtinDispatchBuilder; + auto builder = kernel->getKernelInfo(rootDeviceIndex).builtinDispatchBuilder; builder->buildDispatchInfos(multiDispatchInfo, kernel, workDim, workItems, enqueuedWorkSizes, globalOffsets); if (multiDispatchInfo.size() == 0) { @@ -674,6 +674,7 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( auto specialPipelineSelectMode = false; Kernel *kernel = nullptr; bool usePerDssBackedBuffer = false; + auto rootDeviceIndex = device->getRootDeviceIndex(); for (auto &dispatchInfo : multiDispatchInfo) { if (kernel != dispatchInfo.getKernel()) { @@ -684,14 +685,14 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( kernel->makeResident(getGpgpuCommandStreamReceiver()); requiresCoherency |= kernel->requiresCoherency(); mediaSamplerRequired |= kernel->isVmeKernel(); - auto numGrfRequiredByKernel = kernel->getKernelInfo().patchInfo.executionEnvironment->NumGRFRequired; + auto numGrfRequiredByKernel = kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->NumGRFRequired; numGrfRequired = std::max(numGrfRequired, numGrfRequiredByKernel); specialPipelineSelectMode |= kernel->requiresSpecialPipelineSelectMode(); if (kernel->hasUncacheableStatelessArgs()) { anyUncacheableArgs = true; } - if (kernel->requiresPerDssBackedBuffer()) { + if (kernel->requiresPerDssBackedBuffer(rootDeviceIndex)) { usePerDssBackedBuffer = true; } } diff --git a/opencl/source/command_queue/enqueue_kernel.h b/opencl/source/command_queue/enqueue_kernel.h index c9ff880554..596543ef6f 100644 --- a/opencl/source/command_queue/enqueue_kernel.h +++ b/opencl/source/command_queue/enqueue_kernel.h @@ -37,7 +37,8 @@ cl_int CommandQueueHw::enqueueKernel( size_t enqueuedLocalWorkSize[3] = {0, 0, 0}; auto &kernel = *castToObjectOrAbort(clKernel); - const auto &kernelInfo = kernel.getKernelInfo(); + auto rootDeviceIndex = device->getRootDeviceIndex(); + const auto &kernelInfo = kernel.getKernelInfo(rootDeviceIndex); if (kernel.isParentKernel && !this->context->getDefaultDeviceQueue()) { return CL_INVALID_OPERATION; @@ -111,28 +112,28 @@ cl_int CommandQueueHw::enqueueKernel( Surface *surfaces[] = {&s}; if (context->isProvidingPerformanceHints()) { - if (kernel.hasPrintfOutput()) { - context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, PRINTF_DETECTED_IN_KERNEL, kernel.getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str()); + if (kernel.hasPrintfOutput(rootDeviceIndex)) { + context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, PRINTF_DETECTED_IN_KERNEL, kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str()); } if (kernel.requiresCoherency()) { - context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, KERNEL_REQUIRES_COHERENCY, kernel.getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str()); + context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, KERNEL_REQUIRES_COHERENCY, kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str()); } } - if (kernel.getKernelInfo().builtinDispatchBuilder != nullptr) { - cl_int err = kernel.getKernelInfo().builtinDispatchBuilder->validateDispatch(&kernel, workDim, Vec3(region), Vec3(workGroupSize), Vec3(globalWorkOffset)); + if (kernelInfo.builtinDispatchBuilder != nullptr) { + cl_int err = kernelInfo.builtinDispatchBuilder->validateDispatch(&kernel, workDim, Vec3(region), Vec3(workGroupSize), Vec3(globalWorkOffset)); if (err != CL_SUCCESS) return err; } - DBG_LOG(PrintDispatchParameters, "Kernel: ", kernel.getKernelInfo().kernelDescriptor.kernelMetadata.kernelName, + DBG_LOG(PrintDispatchParameters, "Kernel: ", kernelInfo.kernelDescriptor.kernelMetadata.kernelName, ",LWS:, ", localWorkSizeIn ? localWorkSizeIn[0] : 0, ",", localWorkSizeIn ? localWorkSizeIn[1] : 0, ",", localWorkSizeIn ? localWorkSizeIn[2] : 0, ",GWS:,", globalWorkSizeIn[0], ",", globalWorkSizeIn[1], ",", globalWorkSizeIn[2], - ",SIMD:, ", kernel.getKernelInfo().getMaxSimdSize()); + ",SIMD:, ", kernelInfo.getMaxSimdSize()); if (totalWorkItems > kernel.maxKernelWorkGroupSize) { return CL_INVALID_WORK_GROUP_SIZE; diff --git a/opencl/source/command_queue/gpgpu_walker_bdw_plus.inl b/opencl/source/command_queue/gpgpu_walker_bdw_plus.inl index bcc8964b30..17def4fa49 100644 --- a/opencl/source/command_queue/gpgpu_walker_bdw_plus.inl +++ b/opencl/source/command_queue/gpgpu_walker_bdw_plus.inl @@ -68,6 +68,9 @@ void GpgpuWalkerHelper::dispatchScheduler( IndirectHeap *dsh, bool isCcsUsed) { + auto rootDeviceIndex = devQueueHw.getDevice().getRootDeviceIndex(); + const auto &kernelInfo = scheduler.getKernelInfo(rootDeviceIndex); + using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA; using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER; using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START; @@ -89,7 +92,7 @@ void GpgpuWalkerHelper::dispatchScheduler( DEBUG_BREAK_IF(offsetInterfaceDescriptorTable % 64 != 0); // Determine SIMD size - uint32_t simd = scheduler.getKernelInfo().getMaxSimdSize(); + uint32_t simd = kernelInfo.getMaxSimdSize(); DEBUG_BREAK_IF(simd != PARALLEL_SCHEDULER_COMPILATION_SIZE_20); // Patch our kernel constants @@ -132,8 +135,8 @@ void GpgpuWalkerHelper::dispatchScheduler( auto pGpGpuWalkerCmd = commandStream.getSpaceForCmd(); GPGPU_WALKER cmdWalker = GfxFamily::cmdInitGpgpuWalker; - bool inlineDataProgrammingRequired = HardwareCommandsHelper::inlineDataProgrammingRequired(scheduler); - auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(scheduler); + bool inlineDataProgrammingRequired = HardwareCommandsHelper::inlineDataProgrammingRequired(scheduler, rootDeviceIndex); + auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(scheduler, rootDeviceIndex); HardwareCommandsHelper::sendIndirectState( commandStream, @@ -141,7 +144,7 @@ void GpgpuWalkerHelper::dispatchScheduler( *ioh, *ssh, scheduler, - scheduler.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, devQueueHw.getDevice().getRootDeviceIndex()), + scheduler.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex), simd, localWorkSizes, offsetInterfaceDescriptorTable, @@ -159,7 +162,7 @@ void GpgpuWalkerHelper::dispatchScheduler( size_t workGroups[3] = {(scheduler.getGws() / scheduler.getLws()), 1, 1}; GpgpuWalkerHelper::setGpgpuWalkerThreadData(&cmdWalker, globalOffsets, globalOffsets, workGroups, localWorkSizes, simd, 1, true, inlineDataProgrammingRequired, - *scheduler.getKernelInfo().patchInfo.threadPayload, 0u); + *kernelInfo.patchInfo.threadPayload, 0u); *pGpGpuWalkerCmd = cmdWalker; // Implement disabling special WA DisableLSQCROPERFforOCL if needed diff --git a/opencl/source/command_queue/hardware_interface_bdw_plus.inl b/opencl/source/command_queue/hardware_interface_bdw_plus.inl index 58d428bc90..917aaec6da 100644 --- a/opencl/source/command_queue/hardware_interface_bdw_plus.inl +++ b/opencl/source/command_queue/hardware_interface_bdw_plus.inl @@ -70,10 +70,11 @@ inline void HardwareInterface::programWalker( Vec3 &numberOfWorkgroups, Vec3 &startOfWorkgroups) { + auto rootDeviceIndex = commandQueue.getDevice().getRootDeviceIndex(); auto walkerCmdBuf = allocateWalkerSpace(commandStream, kernel); WALKER_TYPE walkerCmd = GfxFamily::cmdInitGpgpuWalker; uint32_t dim = dispatchInfo.getDim(); - uint32_t simd = kernel.getKernelInfo().getMaxSimdSize(); + uint32_t simd = kernel.getKernelInfo(rootDeviceIndex).getMaxSimdSize(); size_t globalOffsets[3] = {dispatchInfo.getOffset().x, dispatchInfo.getOffset().y, dispatchInfo.getOffset().z}; size_t startWorkGroups[3] = {startOfWorkgroups.x, startOfWorkgroups.y, startOfWorkgroups.z}; @@ -85,7 +86,7 @@ inline void HardwareInterface::programWalker( } auto isCcsUsed = EngineHelpers::isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType()); - auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(kernel); + auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(kernel, rootDeviceIndex); HardwareCommandsHelper::sendIndirectState( commandStream, @@ -93,7 +94,7 @@ inline void HardwareInterface::programWalker( ioh, ssh, kernel, - kernel.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, commandQueue.getDevice().getRootDeviceIndex()), + kernel.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex), simd, localWorkSizes, offsetInterfaceDescriptorTable, @@ -107,7 +108,7 @@ inline void HardwareInterface::programWalker( GpgpuWalkerHelper::setGpgpuWalkerThreadData(&walkerCmd, globalOffsets, startWorkGroups, numWorkGroups, localWorkSizes, simd, dim, false, false, - *kernel.getKernelInfo().patchInfo.threadPayload, 0u); + *kernel.getKernelInfo(rootDeviceIndex).patchInfo.threadPayload, 0u); EncodeDispatchKernel::encodeAdditionalWalkerFields(commandQueue.getDevice().getHardwareInfo(), walkerCmd); *walkerCmdBuf = walkerCmd; diff --git a/opencl/source/command_queue/local_work_size.cpp b/opencl/source/command_queue/local_work_size.cpp index f422e636b9..3072e048d7 100644 --- a/opencl/source/command_queue/local_work_size.cpp +++ b/opencl/source/command_queue/local_work_size.cpp @@ -416,10 +416,11 @@ Vec3 computeWorkgroupSize(const DispatchInfo &dispatchInfo) { if (kernel != nullptr) { auto &device = dispatchInfo.getClDevice(); + auto rootDeviceIndex = device.getRootDeviceIndex(); const auto &hwInfo = device.getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); auto isSimulation = device.isSimulation(); - if (kernel->requiresLimitedWorkgroupSize(device.getRootDeviceIndex()) && hwHelper.isSpecialWorkgroupSizeRequired(hwInfo, isSimulation)) { + if (kernel->requiresLimitedWorkgroupSize(rootDeviceIndex) && hwHelper.isSpecialWorkgroupSizeRequired(hwInfo, isSimulation)) { setSpecialWorkgroupSize(workGroupSize); } else if (DebugManager.flags.EnableComputeWorkSizeND.get()) { WorkSizeInfo wsInfo(dispatchInfo); @@ -427,7 +428,7 @@ Vec3 computeWorkgroupSize(const DispatchInfo &dispatchInfo) { computeWorkgroupSizeND(wsInfo, workGroupSize, workItems, dispatchInfo.getDim()); } else { auto maxWorkGroupSize = kernel->maxKernelWorkGroupSize; - auto simd = kernel->getKernelInfo().getMaxSimdSize(); + auto simd = kernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize(); size_t workItems[3] = {dispatchInfo.getGWS().x, dispatchInfo.getGWS().y, dispatchInfo.getGWS().z}; if (dispatchInfo.getDim() == 1) { computeWorkgroupSize1D(maxWorkGroupSize, workGroupSize, workItems, simd); @@ -475,8 +476,9 @@ void provideLocalWorkGroupSizeHints(Context *context, DispatchInfo dispatchInfo) preferredWorkGroupSize[1] = lws.y; preferredWorkGroupSize[2] = lws.z; + const auto &kernelInfo = dispatchInfo.getKernel()->getKernelInfo(dispatchInfo.getClDevice().getRootDeviceIndex()); if (dispatchInfo.getEnqueuedWorkgroupSize().x == 0) { - context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, NULL_LOCAL_WORKGROUP_SIZE, dispatchInfo.getKernel()->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), + context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, NULL_LOCAL_WORKGROUP_SIZE, kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str(), preferredWorkGroupSize[0], preferredWorkGroupSize[1], preferredWorkGroupSize[2]); } else { size_t localWorkSizesIn[3] = {dispatchInfo.getEnqueuedWorkgroupSize().x, dispatchInfo.getEnqueuedWorkgroupSize().y, dispatchInfo.getEnqueuedWorkgroupSize().z}; @@ -484,7 +486,7 @@ void provideLocalWorkGroupSizeHints(Context *context, DispatchInfo dispatchInfo) if (localWorkSizesIn[i] != preferredWorkGroupSize[i]) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, BAD_LOCAL_WORKGROUP_SIZE, localWorkSizesIn[0], localWorkSizesIn[1], localWorkSizesIn[2], - dispatchInfo.getKernel()->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), + kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str(), preferredWorkGroupSize[0], preferredWorkGroupSize[1], preferredWorkGroupSize[2]); break; } diff --git a/opencl/source/command_stream/aub_command_stream_receiver_hw_base.inl b/opencl/source/command_stream/aub_command_stream_receiver_hw_base.inl index 2734822372..e202a8bf22 100644 --- a/opencl/source/command_stream/aub_command_stream_receiver_hw_base.inl +++ b/opencl/source/command_stream/aub_command_stream_receiver_hw_base.inl @@ -772,7 +772,7 @@ void AUBCommandStreamReceiverHw::dumpAllocation(GraphicsAllocation &g template AubSubCaptureStatus AUBCommandStreamReceiverHw::checkAndActivateAubSubCapture(const MultiDispatchInfo &dispatchInfo) { - std::string kernelName = dispatchInfo.peekMainKernel()->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName; + std::string kernelName = dispatchInfo.peekMainKernel()->getKernelInfo(this->rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName; auto status = subCaptureManager->checkAndActivateSubCapture(kernelName); if (status.isActive) { std::string subCaptureFile = subCaptureManager->getSubCaptureFileName(kernelName); diff --git a/opencl/source/command_stream/tbx_command_stream_receiver_hw.inl b/opencl/source/command_stream/tbx_command_stream_receiver_hw.inl index 5caa2e7a4b..14ea506752 100644 --- a/opencl/source/command_stream/tbx_command_stream_receiver_hw.inl +++ b/opencl/source/command_stream/tbx_command_stream_receiver_hw.inl @@ -553,7 +553,7 @@ AubSubCaptureStatus TbxCommandStreamReceiverHw::checkAndActivateAubSu return {false, false}; } - std::string kernelName = (dispatchInfo.empty() ? "" : dispatchInfo.peekMainKernel()->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName); + std::string kernelName = (dispatchInfo.empty() ? "" : dispatchInfo.peekMainKernel()->getKernelInfo(this->rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName); auto status = subCaptureManager->checkAndActivateSubCapture(kernelName); if (status.isActive && !status.wasActiveInPreviousEnqueue) { dumpTbxNonWritable = true; diff --git a/opencl/source/gen8/gpgpu_walker_gen8.cpp b/opencl/source/gen8/gpgpu_walker_gen8.cpp index 358310ae20..ea1358900f 100644 --- a/opencl/source/gen8/gpgpu_walker_gen8.cpp +++ b/opencl/source/gen8/gpgpu_walker_gen8.cpp @@ -15,12 +15,12 @@ namespace NEO { template <> void GpgpuWalkerHelper::applyWADisableLSQCROPERFforOCL(NEO::LinearStream *pCommandStream, const Kernel &kernel, bool disablePerfMode) { if (disablePerfMode) { - if (kernel.getKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) { + if (kernel.getDefaultKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) { // Set bit L3SQC_BIT_LQSC_RO_PERF_DIS in L3SQC_REG4 GpgpuWalkerHelper::addAluReadModifyWriteRegister(pCommandStream, L3SQC_REG4, AluRegisters::OPCODE_OR, L3SQC_BIT_LQSC_RO_PERF_DIS); } } else { - if (kernel.getKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) { + if (kernel.getDefaultKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) { // Add PIPE_CONTROL with CS_Stall to wait till GPU finishes its work typedef typename BDWFamily::PIPE_CONTROL PIPE_CONTROL; auto pCmd = reinterpret_cast(pCommandStream->getSpace(sizeof(PIPE_CONTROL))); @@ -40,7 +40,7 @@ size_t GpgpuWalkerHelper::getSizeForWADisableLSQCROPERFforOCL(const K typedef typename BDWFamily::MI_MATH MI_MATH; typedef typename BDWFamily::MI_MATH_ALU_INST_INLINE MI_MATH_ALU_INST_INLINE; size_t n = 0; - if (pKernel->getKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) { + if (pKernel->getDefaultKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) { n += sizeof(PIPE_CONTROL) + (2 * sizeof(MI_LOAD_REGISTER_REG) + sizeof(MI_LOAD_REGISTER_IMM) + diff --git a/opencl/source/gen9/gpgpu_walker_gen9.cpp b/opencl/source/gen9/gpgpu_walker_gen9.cpp index 9919285972..3c5a63cd57 100644 --- a/opencl/source/gen9/gpgpu_walker_gen9.cpp +++ b/opencl/source/gen9/gpgpu_walker_gen9.cpp @@ -15,12 +15,12 @@ namespace NEO { template <> void GpgpuWalkerHelper::applyWADisableLSQCROPERFforOCL(NEO::LinearStream *pCommandStream, const Kernel &kernel, bool disablePerfMode) { if (disablePerfMode) { - if (kernel.getKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) { + if (kernel.getDefaultKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) { // Set bit L3SQC_BIT_LQSC_RO_PERF_DIS in L3SQC_REG4 GpgpuWalkerHelper::addAluReadModifyWriteRegister(pCommandStream, L3SQC_REG4, AluRegisters::OPCODE_OR, L3SQC_BIT_LQSC_RO_PERF_DIS); } } else { - if (kernel.getKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) { + if (kernel.getDefaultKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) { // Add PIPE_CONTROL with CS_Stall to wait till GPU finishes its work typedef typename SKLFamily::PIPE_CONTROL PIPE_CONTROL; auto pCmd = reinterpret_cast(pCommandStream->getSpace(sizeof(PIPE_CONTROL))); @@ -40,7 +40,7 @@ size_t GpgpuWalkerHelper::getSizeForWADisableLSQCROPERFforOCL(const K typedef typename SKLFamily::MI_MATH MI_MATH; typedef typename SKLFamily::MI_MATH_ALU_INST_INLINE MI_MATH_ALU_INST_INLINE; size_t n = 0; - if (pKernel->getKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) { + if (pKernel->getDefaultKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) { n += sizeof(PIPE_CONTROL) + (2 * sizeof(MI_LOAD_REGISTER_REG) + sizeof(MI_LOAD_REGISTER_IMM) + diff --git a/opencl/source/gtpin/gtpin_callbacks.cpp b/opencl/source/gtpin/gtpin_callbacks.cpp index 9a185bc9a8..4f44b0279f 100644 --- a/opencl/source/gtpin/gtpin_callbacks.cpp +++ b/opencl/source/gtpin/gtpin_callbacks.cpp @@ -80,7 +80,7 @@ void gtpinNotifyKernelCreate(cl_kernel kernel) { // Notify GT-Pin that new kernel was created Context *pContext = &(pKernel->getContext()); cl_context context = pContext; - auto &kernelInfo = pKernel->getKernelInfo(); + auto &kernelInfo = pKernel->getKernelInfo(rootDeviceIndex); instrument_params_in_t paramsIn = {}; paramsIn.kernel_type = GTPIN_KERNEL_TYPE_CS; diff --git a/opencl/source/helpers/dispatch_info.cpp b/opencl/source/helpers/dispatch_info.cpp index 894e602742..44df3ab4ce 100644 --- a/opencl/source/helpers/dispatch_info.cpp +++ b/opencl/source/helpers/dispatch_info.cpp @@ -15,7 +15,7 @@ bool DispatchInfo::usesSlm() const { } bool DispatchInfo::usesStatelessPrintfSurface() const { - return (kernel == nullptr) ? false : (kernel->getKernelInfo().patchInfo.pAllocateStatelessPrintfSurface != nullptr); + return (kernel == nullptr) ? false : (kernel->getKernelInfo(pClDevice->getRootDeviceIndex()).patchInfo.pAllocateStatelessPrintfSurface != nullptr); } uint32_t DispatchInfo::getRequiredScratchSize() const { diff --git a/opencl/source/helpers/hardware_commands_helper.h b/opencl/source/helpers/hardware_commands_helper.h index 31d52c68c8..c932761498 100644 --- a/opencl/source/helpers/hardware_commands_helper.h +++ b/opencl/source/helpers/hardware_commands_helper.h @@ -42,7 +42,8 @@ struct HardwareCommandsHelper : public PerThreadDataHelper { INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const Kernel &kernel, const size_t &sizeCrossThreadData, - const size_t &sizePerThreadData); + const size_t &sizePerThreadData, + uint32_t rootDeviceIndex); inline static uint32_t additionalSizeRequiredDsh(); @@ -60,7 +61,7 @@ struct HardwareCommandsHelper : public PerThreadDataHelper { uint32_t bindingTablePrefetchSize, PreemptionMode preemptionMode, INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor, - const HardwareInfo &hardwareInfo); + const Device &device); static void sendMediaStateFlush( LinearStream &commandStream, @@ -105,7 +106,8 @@ struct HardwareCommandsHelper : public PerThreadDataHelper { const size_t localWorkSize[3], Kernel &kernel, size_t &sizePerThreadDataTotal, - size_t &localWorkItems); + size_t &localWorkItems, + uint32_t rootDeviceIndex); static void updatePerThreadDataTotal( size_t &sizePerThreadData, @@ -120,6 +122,7 @@ struct HardwareCommandsHelper : public PerThreadDataHelper { static size_t getSizeRequiredForCacheFlush(const CommandQueue &commandQueue, const Kernel *kernel, uint64_t postSyncAddress); static size_t getSizeRequiredDSH( + uint32_t rootDeviceIndex, const Kernel &kernel); static size_t getSizeRequiredIOH( uint32_t rootDeviceIndex, @@ -142,7 +145,7 @@ struct HardwareCommandsHelper : public PerThreadDataHelper { static void programCacheFlushAfterWalkerCommand(LinearStream *commandStream, const CommandQueue &commandQueue, const Kernel *kernel, uint64_t postSyncAddress); - static bool inlineDataProgrammingRequired(const Kernel &kernel); - static bool kernelUsesLocalIds(const Kernel &kernel); + static bool inlineDataProgrammingRequired(const Kernel &kernel, uint32_t rootDeviceIndex); + static bool kernelUsesLocalIds(const Kernel &kernel, uint32_t rootDeviceIndex); }; } // namespace NEO diff --git a/opencl/source/helpers/hardware_commands_helper_base.inl b/opencl/source/helpers/hardware_commands_helper_base.inl index 7d0f7b1620..106fde7221 100644 --- a/opencl/source/helpers/hardware_commands_helper_base.inl +++ b/opencl/source/helpers/hardware_commands_helper_base.inl @@ -31,10 +31,11 @@ namespace NEO { template size_t HardwareCommandsHelper::getSizeRequiredDSH( + uint32_t rootDeviceIndex, const Kernel &kernel) { using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA; using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE; - const auto &patchInfo = kernel.getKernelInfo().patchInfo; + const auto &patchInfo = kernel.getKernelInfo(rootDeviceIndex).patchInfo; auto samplerCount = patchInfo.samplerStateArray ? patchInfo.samplerStateArray->Count : 0; @@ -51,7 +52,7 @@ size_t HardwareCommandsHelper::getSizeRequiredDSH( totalSize += borderColorSize + additionalSizeRequiredDsh(); - DEBUG_BREAK_IF(!(totalSize >= kernel.getDynamicStateHeapSize() || kernel.getKernelInfo().isVmeWorkload)); + DEBUG_BREAK_IF(!(totalSize >= kernel.getDynamicStateHeapSize() || kernel.getKernelInfo(rootDeviceIndex).isVmeWorkload)); return alignUp(totalSize, EncodeStates::alignInterfaceDescriptorData); } @@ -63,13 +64,13 @@ size_t HardwareCommandsHelper::getSizeRequiredIOH( size_t localWorkSize) { typedef typename GfxFamily::WALKER_TYPE WALKER_TYPE; - auto threadPayload = kernel.getKernelInfo().patchInfo.threadPayload; + auto threadPayload = kernel.getKernelInfo(rootDeviceIndex).patchInfo.threadPayload; DEBUG_BREAK_IF(nullptr == threadPayload); auto numChannels = PerThreadDataHelper::getNumLocalIdChannels(*threadPayload); uint32_t grfSize = sizeof(typename GfxFamily::GRF); return alignUp((kernel.getCrossThreadDataSize(rootDeviceIndex) + - getPerThreadDataSizeTotal(kernel.getKernelInfo().getMaxSimdSize(), grfSize, numChannels, localWorkSize)), + getPerThreadDataSizeTotal(kernel.getKernelInfo(rootDeviceIndex).getMaxSimdSize(), grfSize, numChannels, localWorkSize)), WALKER_TYPE::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); } @@ -97,7 +98,7 @@ size_t getSizeRequired(const MultiDispatchInfo &multiDispatchInfo, SizeGetterT & template size_t HardwareCommandsHelper::getTotalSizeRequiredDSH( const MultiDispatchInfo &multiDispatchInfo) { - return getSizeRequired(multiDispatchInfo, [](const DispatchInfo &dispatchInfo) { return getSizeRequiredDSH(*dispatchInfo.getKernel()); }); + return getSizeRequired(multiDispatchInfo, [](const DispatchInfo &dispatchInfo) { return getSizeRequiredDSH(dispatchInfo.getClDevice().getRootDeviceIndex(), *dispatchInfo.getKernel()); }); } template @@ -159,10 +160,13 @@ size_t HardwareCommandsHelper::sendInterfaceDescriptorData( uint32_t bindingTablePrefetchSize, PreemptionMode preemptionMode, INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor, - const HardwareInfo &hardwareInfo) { + const Device &device) { using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE; using SHARED_LOCAL_MEMORY_SIZE = typename INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE; + const auto &hardwareInfo = device.getHardwareInfo(); + auto rootDeviceIndex = device.getRootDeviceIndex(); + // Allocate some memory for the interface descriptor auto pInterfaceDescriptor = getInterfaceDescriptor(indirectHeap, offsetInterfaceDescriptor, inlineInterfaceDescriptor); auto interfaceDescriptor = GfxFamily::cmdInitInterfaceDescriptorData; @@ -176,7 +180,7 @@ size_t HardwareCommandsHelper::sendInterfaceDescriptorData( interfaceDescriptor.setDenormMode(INTERFACE_DESCRIPTOR_DATA::DENORM_MODE_SETBYKERNEL); - setGrfInfo(&interfaceDescriptor, kernel, sizeCrossThreadData, sizePerThreadData); + setGrfInfo(&interfaceDescriptor, kernel, sizeCrossThreadData, sizePerThreadData, rootDeviceIndex); EncodeDispatchKernel::appendAdditionalIDDFields(&interfaceDescriptor, hardwareInfo, threadsPerThreadGroup, kernel.slmTotalSize); interfaceDescriptor.setBindingTablePointer(static_cast(bindingTablePointer)); @@ -190,7 +194,7 @@ size_t HardwareCommandsHelper::sendInterfaceDescriptorData( interfaceDescriptor.setSharedLocalMemorySize(programmableIDSLMSize); EncodeDispatchKernel::programBarrierEnable(interfaceDescriptor, - kernel.getKernelInfo().patchInfo.executionEnvironment->HasBarriers, + kernel.getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->HasBarriers, hardwareInfo); PreemptionHelper::programInterfaceDescriptorDataPreemption(&interfaceDescriptor, preemptionMode); @@ -220,14 +224,13 @@ size_t HardwareCommandsHelper::sendIndirectState( using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE; - auto &hardwareInfo = device.getHardwareInfo(); auto rootDeviceIndex = device.getRootDeviceIndex(); DEBUG_BREAK_IF(simd != 1 && simd != 8 && simd != 16 && simd != 32); - auto inlineDataProgrammingRequired = HardwareCommandsHelper::inlineDataProgrammingRequired(kernel); + auto inlineDataProgrammingRequired = HardwareCommandsHelper::inlineDataProgrammingRequired(kernel, rootDeviceIndex); // Copy the kernel over to the ISH - const auto &kernelInfo = kernel.getKernelInfo(); + const auto &kernelInfo = kernel.getKernelInfo(rootDeviceIndex); const auto &patchInfo = kernelInfo.patchInfo; ssh.align(BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); @@ -245,7 +248,7 @@ size_t HardwareCommandsHelper::sendIndirectState( samplerStateOffset = EncodeStates::copySamplerState(&dsh, patchInfo.samplerStateArray->Offset, samplerCount, patchInfo.samplerStateArray->BorderColorOffset, kernel.getDynamicStateHeap(), device.getBindlessHeapsHelper()); } - auto threadPayload = kernel.getKernelInfo().patchInfo.threadPayload; + auto threadPayload = kernelInfo.patchInfo.threadPayload; DEBUG_BREAK_IF(nullptr == threadPayload); auto localWorkItems = localWorkSize[0] * localWorkSize[1] * localWorkSize[2]; @@ -270,7 +273,8 @@ size_t HardwareCommandsHelper::sendIndirectState( localWorkSize, kernel, sizePerThreadDataTotal, - localWorkItems); + localWorkItems, + rootDeviceIndex); uint64_t offsetInterfaceDescriptor = offsetInterfaceDescriptorTable + interfaceDescriptorIndex * sizeof(INTERFACE_DESCRIPTOR_DATA); DEBUG_BREAK_IF(patchInfo.executionEnvironment == nullptr); @@ -294,7 +298,7 @@ size_t HardwareCommandsHelper::sendIndirectState( bindingTablePrefetchSize, preemptionMode, inlineInterfaceDescriptor, - hardwareInfo); + device); if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { PatchInfoData patchInfoData(kernelStartOffset, 0, PatchInfoAllocationType::InstructionHeap, dsh.getGraphicsAllocation()->getGpuAddress(), offsetInterfaceDescriptor, PatchInfoAllocationType::DynamicStateHeap); @@ -335,22 +339,22 @@ void HardwareCommandsHelper::updatePerThreadDataTotal( } template -bool HardwareCommandsHelper::inlineDataProgrammingRequired(const Kernel &kernel) { +bool HardwareCommandsHelper::inlineDataProgrammingRequired(const Kernel &kernel, uint32_t rootDeviceIndex) { auto checkKernelForInlineData = true; if (DebugManager.flags.EnablePassInlineData.get() != -1) { checkKernelForInlineData = !!DebugManager.flags.EnablePassInlineData.get(); } if (checkKernelForInlineData) { - return kernel.getKernelInfo().patchInfo.threadPayload->PassInlineData; + return kernel.getKernelInfo(rootDeviceIndex).patchInfo.threadPayload->PassInlineData; } return false; } template -bool HardwareCommandsHelper::kernelUsesLocalIds(const Kernel &kernel) { - return (kernel.getKernelInfo().patchInfo.threadPayload->LocalIDXPresent || - kernel.getKernelInfo().patchInfo.threadPayload->LocalIDYPresent || - kernel.getKernelInfo().patchInfo.threadPayload->LocalIDZPresent); +bool HardwareCommandsHelper::kernelUsesLocalIds(const Kernel &kernel, uint32_t rootDeviceIndex) { + return (kernel.getKernelInfo(rootDeviceIndex).patchInfo.threadPayload->LocalIDXPresent || + kernel.getKernelInfo(rootDeviceIndex).patchInfo.threadPayload->LocalIDYPresent || + kernel.getKernelInfo(rootDeviceIndex).patchInfo.threadPayload->LocalIDZPresent); } } // namespace NEO diff --git a/opencl/source/helpers/hardware_commands_helper_bdw_plus.inl b/opencl/source/helpers/hardware_commands_helper_bdw_plus.inl index aa2279a148..fbda56410b 100644 --- a/opencl/source/helpers/hardware_commands_helper_bdw_plus.inl +++ b/opencl/source/helpers/hardware_commands_helper_bdw_plus.inl @@ -25,7 +25,8 @@ typename HardwareCommandsHelper::INTERFACE_DESCRIPTOR_DATA *HardwareC template void HardwareCommandsHelper::setGrfInfo(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const Kernel &kernel, - const size_t &sizeCrossThreadData, const size_t &sizePerThreadData) { + const size_t &sizeCrossThreadData, const size_t &sizePerThreadData, + uint32_t rootDeviceIndex) { auto grfSize = sizeof(typename GfxFamily::GRF); DEBUG_BREAK_IF((sizeCrossThreadData % grfSize) != 0); auto numGrfCrossThreadData = static_cast(sizeCrossThreadData / grfSize); @@ -101,7 +102,8 @@ void HardwareCommandsHelper::programPerThreadData( const size_t localWorkSize[3], Kernel &kernel, size_t &sizePerThreadDataTotal, - size_t &localWorkItems) { + size_t &localWorkItems, + uint32_t rootDeviceIndex) { uint32_t grfSize = sizeof(typename GfxFamily::GRF); @@ -111,9 +113,9 @@ void HardwareCommandsHelper::programPerThreadData( grfSize, numChannels, std::array{{static_cast(localWorkSize[0]), static_cast(localWorkSize[1]), static_cast(localWorkSize[2])}}, - std::array{{kernel.getKernelInfo().kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[0], - kernel.getKernelInfo().kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[1], - kernel.getKernelInfo().kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[2]}}, + std::array{{kernel.getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[0], + kernel.getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[1], + kernel.getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.workgroupDimensionsOrder[2]}}, kernel.usesOnlyImages()); updatePerThreadDataTotal(sizePerThreadData, simd, numChannels, sizePerThreadDataTotal, localWorkItems); diff --git a/opencl/source/helpers/task_information.cpp b/opencl/source/helpers/task_information.cpp index bc0dadbba4..d96017998b 100644 --- a/opencl/source/helpers/task_information.cpp +++ b/opencl/source/helpers/task_information.cpp @@ -209,6 +209,9 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate commandQueue.getGpgpuCommandStreamReceiver(), bcsCsr); } + auto rootDeviceIndex = commandQueue.getDevice().getRootDeviceIndex(); + const auto &kernelInfo = kernel->getKernelInfo(rootDeviceIndex); + DispatchFlags dispatchFlags( {}, //csrDependencies nullptr, //barrierTimestampPacketNodes @@ -216,7 +219,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate commandQueue.flushStamp->getStampReference(), //flushStampReference commandQueue.getThrottle(), //throttle preemptionMode, //preemptionMode - kernel->getKernelInfo().patchInfo.executionEnvironment->NumGRFRequired, //numGrfRequired + kernelInfo.patchInfo.executionEnvironment->NumGRFRequired, //numGrfRequired L3CachingSettings::l3CacheOn, //l3CacheSettings kernel->getThreadArbitrationPolicy(), //threadArbitrationPolicy kernel->getAdditionalKernelExecInfo(), //additionalKernelExecInfo @@ -232,7 +235,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate false, //implicitFlush commandQueue.getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed false, //epilogueRequired - kernel->requiresPerDssBackedBuffer(), //usePerDssBackedBuffer + kernel->requiresPerDssBackedBuffer(rootDeviceIndex), //usePerDssBackedBuffer kernel->isSingleSubdevicePreferred()); if (timestampPacketDependencies) { diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index d8af1ad682..0648cc092a 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -251,7 +251,7 @@ cl_int Kernel::initialize() { localBindingTableOffset = (patchInfo.bindingTableState != nullptr) ? patchInfo.bindingTableState->Offset : 0; // patch crossthread data and ssh with inline surfaces, if necessary - auto perHwThreadPrivateMemorySize = PatchTokenBinary::getPerHwThreadPrivateSurfaceSize(patchInfo.pAllocateStatelessPrivateSurface, getKernelInfo().getMaxSimdSize()); + auto perHwThreadPrivateMemorySize = PatchTokenBinary::getPerHwThreadPrivateSurfaceSize(patchInfo.pAllocateStatelessPrivateSurface, kernelInfo.getMaxSimdSize()); if (perHwThreadPrivateMemorySize) { kernelDeviceInfos[rootDeviceIndex].privateSurfaceSize = KernelHelper::getPrivateSurfaceSize(perHwThreadPrivateMemorySize, pClDevice->getSharedDeviceInfo().computeUnitsUsedForScratch); @@ -376,7 +376,7 @@ cl_int Kernel::initialize() { program->allocateBlockPrivateSurfaces(*pClDevice); } - if (program->isKernelDebugEnabled() && getKernelInfo().patchInfo.pAllocateSystemThreadSurface) { + if (program->isKernelDebugEnabled() && kernelInfo.patchInfo.pAllocateSystemThreadSurface) { debugEnabled = true; } @@ -643,9 +643,10 @@ cl_int Kernel::getSubGroupInfo(ClDevice &clDevice, cl_kernel_sub_group_info para size_t *paramValueSizeRet) const { size_t numDimensions = 0; size_t WGS = 1; - auto maxSimdSize = static_cast(getKernelInfo().getMaxSimdSize()); - auto maxRequiredWorkGroupSize = static_cast(getKernelInfo().getMaxRequiredWorkGroupSize(maxKernelWorkGroupSize)); - auto largestCompiledSIMDSize = static_cast(getKernelInfo().patchInfo.executionEnvironment->LargestCompiledSIMDSize); + const auto &kernelInfo = getKernelInfo(clDevice.getRootDeviceIndex()); + auto maxSimdSize = static_cast(kernelInfo.getMaxSimdSize()); + auto maxRequiredWorkGroupSize = static_cast(kernelInfo.getMaxRequiredWorkGroupSize(maxKernelWorkGroupSize)); + auto largestCompiledSIMDSize = static_cast(kernelInfo.patchInfo.executionEnvironment->LargestCompiledSIMDSize); GetInfoHelper info(paramValue, paramValueSize, paramValueSizeRet); @@ -731,10 +732,10 @@ cl_int Kernel::getSubGroupInfo(ClDevice &clDevice, cl_kernel_sub_group_info para return changeGetInfoStatusToCLResultType(info.set(Math::divideAndRoundUp(maxRequiredWorkGroupSize, largestCompiledSIMDSize))); } case CL_KERNEL_COMPILE_NUM_SUB_GROUPS: { - return changeGetInfoStatusToCLResultType(info.set(static_cast(getKernelInfo().patchInfo.executionEnvironment->CompiledSubGroupsNumber))); + return changeGetInfoStatusToCLResultType(info.set(static_cast(kernelInfo.patchInfo.executionEnvironment->CompiledSubGroupsNumber))); } case CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL: { - return changeGetInfoStatusToCLResultType(info.set(getKernelInfo().requiredSubGroupSize)); + return changeGetInfoStatusToCLResultType(info.set(kernelInfo.requiredSubGroupSize)); } default: return CL_INVALID_VALUE; @@ -822,8 +823,8 @@ cl_int Kernel::setArg(uint32_t argIndex, size_t argSize, const void *argVal) { cl_int retVal = CL_SUCCESS; bool updateExposedKernel = true; auto argWasUncacheable = false; - if (getKernelInfo().builtinDispatchBuilder != nullptr) { - updateExposedKernel = getKernelInfo().builtinDispatchBuilder->setExplicitArg(argIndex, argSize, argVal, retVal); + if (getDefaultKernelInfo().builtinDispatchBuilder != nullptr) { + updateExposedKernel = getDefaultKernelInfo().builtinDispatchBuilder->setExplicitArg(argIndex, argSize, argVal, retVal); } if (updateExposedKernel) { if (argIndex >= kernelArgHandlers.size()) { @@ -1811,8 +1812,8 @@ void Kernel::getParentObjectCounts(ObjectCounts &objectCount) { } } -bool Kernel::hasPrintfOutput() const { - return getKernelInfo().patchInfo.pAllocateStatelessPrintfSurface != nullptr; +bool Kernel::hasPrintfOutput(uint32_t rootDeviceIndex) const { + return getKernelInfo(rootDeviceIndex).patchInfo.pAllocateStatelessPrintfSurface != nullptr; } size_t Kernel::getInstructionHeapSizeForExecutionModel() const { @@ -2245,7 +2246,7 @@ void Kernel::provideInitializationHints() { const auto &patchInfo = kernelInfos[i]->patchInfo; if (patchInfo.mediavfestate) { auto scratchSize = patchInfo.mediavfestate->PerThreadScratchSpace; - scratchSize *= getDevice().getSharedDeviceInfo().computeUnitsUsedForScratch * getKernelInfo().getMaxSimdSize(); + scratchSize *= getDevice().getSharedDeviceInfo().computeUnitsUsedForScratch * getKernelInfo(i).getMaxSimdSize(); if (scratchSize > 0) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, REGISTER_PRESSURE_TOO_HIGH, kernelInfos[i]->kernelDescriptor.kernelMetadata.kernelName.c_str(), scratchSize); @@ -2338,12 +2339,12 @@ bool Kernel::isPatched() const { cl_int Kernel::checkCorrectImageAccessQualifier(cl_uint argIndex, size_t argSize, const void *argValue) const { - if (getKernelInfo().kernelArgInfo[argIndex].isImage) { + if (getDefaultKernelInfo().kernelArgInfo[argIndex].isImage) { cl_mem mem = *(static_cast(argValue)); MemObj *pMemObj = nullptr; WithCastToInternal(mem, &pMemObj); if (pMemObj) { - auto accessQualifier = getKernelInfo().kernelArgInfo[argIndex].metadata.accessQualifier; + auto accessQualifier = getDefaultKernelInfo().kernelArgInfo[argIndex].metadata.accessQualifier; cl_mem_flags flags = pMemObj->getFlags(); if ((accessQualifier == KernelArgMetadata::AccessReadOnly && ((flags | CL_MEM_WRITE_ONLY) == flags)) || (accessQualifier == KernelArgMetadata::AccessWriteOnly && ((flags | CL_MEM_READ_ONLY) == flags))) { diff --git a/opencl/source/kernel/kernel.h b/opencl/source/kernel/kernel.h index 70887448ce..f9274f4828 100644 --- a/opencl/source/kernel/kernel.h +++ b/opencl/source/kernel/kernel.h @@ -187,8 +187,8 @@ class Kernel : public BaseObject<_cl_kernel> { return getDefaultKernelInfo().requiresSshForBuffers; } - const KernelInfo &getKernelInfo() const { - return getDefaultKernelInfo(); + const KernelInfo &getKernelInfo(uint32_t rootDeviceIndex) const { + return *kernelInfos[rootDeviceIndex]; } const KernelInfoContainer &getKernelInfos() const { return kernelInfos; @@ -296,7 +296,7 @@ class Kernel : public BaseObject<_cl_kernel> { bool isUsingSharedObjArgs() const { return usingSharedObjArgs; } bool hasUncacheableStatelessArgs() const { return statelessUncacheableArgsCount > 0; } - bool hasPrintfOutput() const; + bool hasPrintfOutput(uint32_t rootDeviceIndex) const; void setReflectionSurfaceBlockBtOffset(uint32_t blockID, uint32_t offset); @@ -414,7 +414,7 @@ class Kernel : public BaseObject<_cl_kernel> { const bool isCssUsed, uint32_t rootDeviceIndex) const; - bool requiresPerDssBackedBuffer() const; + bool requiresPerDssBackedBuffer(uint32_t rootDeviceIndex) const; bool requiresLimitedWorkgroupSize(uint32_t rootDeviceIndex) const; bool isKernelDebugEnabled() const { return debugEnabled; } int32_t setAdditionalKernelExecInfoWithParam(uint32_t paramName, size_t paramValueSize, const void *paramValue); @@ -424,9 +424,9 @@ class Kernel : public BaseObject<_cl_kernel> { const ClDeviceVector &getDevices() const { return program->getDevices(); } + const KernelInfo &getDefaultKernelInfo() const; protected: - const KernelInfo &getDefaultKernelInfo() const; struct ObjectCounts { uint32_t imageCount; uint32_t samplerCount; diff --git a/opencl/source/kernel/kernel_extra.cpp b/opencl/source/kernel/kernel_extra.cpp index 9712a23310..6e6b563301 100644 --- a/opencl/source/kernel/kernel_extra.cpp +++ b/opencl/source/kernel/kernel_extra.cpp @@ -29,7 +29,7 @@ int Kernel::setKernelThreadArbitrationPolicy(uint32_t policy) { return CL_SUCCESS; } -bool Kernel::requiresPerDssBackedBuffer() const { +bool Kernel::requiresPerDssBackedBuffer(uint32_t rootDeviceIndex) const { return DebugManager.flags.ForcePerDssBackedBufferProgramming.get(); } diff --git a/opencl/source/program/kernel_info.cpp b/opencl/source/program/kernel_info.cpp index 74c04dda85..9ea5975c84 100644 --- a/opencl/source/program/kernel_info.cpp +++ b/opencl/source/program/kernel_info.cpp @@ -133,11 +133,12 @@ WorkSizeInfo::WorkSizeInfo(uint32_t maxWorkGroupSize, bool hasBarriers, uint32_t } WorkSizeInfo::WorkSizeInfo(const DispatchInfo &dispatchInfo) { auto &device = dispatchInfo.getClDevice(); + const auto &kernelInfo = dispatchInfo.getKernel()->getKernelInfo(device.getRootDeviceIndex()); this->maxWorkGroupSize = dispatchInfo.getKernel()->maxKernelWorkGroupSize; - auto pExecutionEnvironment = dispatchInfo.getKernel()->getKernelInfo().patchInfo.executionEnvironment; + auto pExecutionEnvironment = kernelInfo.patchInfo.executionEnvironment; this->hasBarriers = (pExecutionEnvironment != nullptr) && (pExecutionEnvironment->HasBarriers); - this->simdSize = (uint32_t)dispatchInfo.getKernel()->getKernelInfo().getMaxSimdSize(); - this->slmTotalSize = (uint32_t)dispatchInfo.getKernel()->slmTotalSize; + this->simdSize = static_cast(kernelInfo.getMaxSimdSize()); + this->slmTotalSize = static_cast(dispatchInfo.getKernel()->slmTotalSize); this->coreFamily = device.getHardwareInfo().platform.eRenderCoreFamily; this->numThreadsPerSubSlice = static_cast(device.getSharedDeviceInfo().maxNumEUsPerSubSlice) * device.getSharedDeviceInfo().numThreadsPerEU; @@ -148,7 +149,7 @@ WorkSizeInfo::WorkSizeInfo(const DispatchInfo &dispatchInfo) { void WorkSizeInfo::setIfUseImg(Kernel *pKernel) { auto ParamsCount = pKernel->getKernelArgsNumber(); for (auto i = 0u; i < ParamsCount; i++) { - if (pKernel->getKernelInfo().kernelArgInfo[i].isImage) { + if (pKernel->getDefaultKernelInfo().kernelArgInfo[i].isImage) { imgUsed = true; yTiledSurfaces = true; } diff --git a/opencl/source/program/printf_handler.cpp b/opencl/source/program/printf_handler.cpp index 44ade48fd1..2e98b619ea 100644 --- a/opencl/source/program/printf_handler.cpp +++ b/opencl/source/program/printf_handler.cpp @@ -58,12 +58,12 @@ void PrintfHandler::prepareDispatch(const MultiDispatchInfo &multiDispatchInfo) sizeof(printfSurfaceInitialDataSize)); auto printfPatchAddress = ptrOffset(reinterpret_cast(kernel->getCrossThreadData(rootDeviceIndex)), - kernel->getKernelInfo().patchInfo.pAllocateStatelessPrintfSurface->DataParamOffset); + kernel->getKernelInfo(rootDeviceIndex).patchInfo.pAllocateStatelessPrintfSurface->DataParamOffset); - patchWithRequiredSize(printfPatchAddress, kernel->getKernelInfo().patchInfo.pAllocateStatelessPrintfSurface->DataParamSize, (uintptr_t)printfSurface->getGpuAddressToPatch()); + patchWithRequiredSize(printfPatchAddress, kernel->getKernelInfo(rootDeviceIndex).patchInfo.pAllocateStatelessPrintfSurface->DataParamSize, (uintptr_t)printfSurface->getGpuAddressToPatch()); if (kernel->requiresSshForBuffers()) { auto surfaceState = ptrOffset(reinterpret_cast(kernel->getSurfaceStateHeap(rootDeviceIndex)), - kernel->getKernelInfo().patchInfo.pAllocateStatelessPrintfSurface->SurfaceStateHeapOffset); + kernel->getKernelInfo(rootDeviceIndex).patchInfo.pAllocateStatelessPrintfSurface->SurfaceStateHeapOffset); void *addressToPatch = printfSurface->getUnderlyingBuffer(); size_t sizeToPatch = printfSurface->getUnderlyingBufferSize(); Buffer::setSurfaceState(&device.getDevice(), surfaceState, sizeToPatch, addressToPatch, 0, printfSurface, 0, 0); @@ -76,7 +76,7 @@ void PrintfHandler::makeResident(CommandStreamReceiver &commandStreamReceiver) { void PrintfHandler::printEnqueueOutput() { PrintFormatter printFormatter(reinterpret_cast(printfSurface->getUnderlyingBuffer()), static_cast(printfSurface->getUnderlyingBufferSize()), - kernel->is32Bit(), kernel->getKernelInfo().patchInfo.stringDataMap); + kernel->is32Bit(), kernel->getKernelInfo(device.getRootDeviceIndex()).patchInfo.stringDataMap); printFormatter.printKernelOutput(); } } // namespace NEO diff --git a/opencl/source/utilities/logger.cpp b/opencl/source/utilities/logger.cpp index 27ac235e77..c404ffbc63 100644 --- a/opencl/source/utilities/logger.cpp +++ b/opencl/source/utilities/logger.cpp @@ -169,8 +169,8 @@ void FileLogger::dumpKernelArgs(const Kernel *kernel) { if (dumpKernelArgsEnabled && kernel != nullptr) { std::unique_lock theLock(mtx); std::ofstream outFile; - - for (unsigned int i = 0; i < kernel->getKernelInfo().kernelArgInfo.size(); i++) { + const auto &kernelInfo = kernel->getDefaultKernelInfo(); + for (unsigned int i = 0; i < kernelInfo.kernelArgInfo.size(); i++) { std::string type; std::string fileName; const char *ptr = nullptr; @@ -178,7 +178,7 @@ void FileLogger::dumpKernelArgs(const Kernel *kernel) { uint64_t flags = 0; std::unique_ptr argVal = nullptr; - auto &argInfo = kernel->getKernelInfo().kernelArgInfo[i]; + auto &argInfo = kernelInfo.kernelArgInfo[i]; if (argInfo.metadata.addressQualifier == KernelArgMetadata::AddrLocal) { type = "local"; @@ -221,7 +221,7 @@ void FileLogger::dumpKernelArgs(const Kernel *kernel) { } if (ptr && size) { - fileName = kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName + "_arg_" + std::to_string(i) + "_" + type + "_size_" + std::to_string(size) + "_flags_" + std::to_string(flags) + ".bin"; + fileName = kernelInfo.kernelDescriptor.kernelMetadata.kernelName + "_arg_" + std::to_string(i) + "_" + type + "_size_" + std::to_string(size) + "_flags_" + std::to_string(flags) + ".bin"; writeToFile(fileName, ptr, size, std::ios::trunc | std::ios::binary); } } diff --git a/opencl/test/unit_test/api/cl_create_program_with_built_in_kernels_tests.cpp b/opencl/test/unit_test/api/cl_create_program_with_built_in_kernels_tests.cpp index 1abc62b9e5..952b5e7f7c 100644 --- a/opencl/test/unit_test/api/cl_create_program_with_built_in_kernels_tests.cpp +++ b/opencl/test/unit_test/api/cl_create_program_with_built_in_kernels_tests.cpp @@ -213,11 +213,11 @@ TEST_F(clCreateProgramWithBuiltInVmeKernelsTests, GivenVmeBlockMotionEstimateKer &retVal); auto kernNeo = castToObject(kernel); - EXPECT_NE(nullptr, kernNeo->getKernelInfo().builtinDispatchBuilder); + EXPECT_NE(nullptr, kernNeo->getKernelInfo(testedRootDeviceIndex).builtinDispatchBuilder); EXPECT_EQ(6U, kernNeo->getKernelArgsNumber()); auto &vmeBuilder = Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::VmeBlockMotionEstimateIntel, *pClDevice); - EXPECT_EQ(&vmeBuilder, kernNeo->getKernelInfo().builtinDispatchBuilder); + EXPECT_EQ(&vmeBuilder, kernNeo->getKernelInfo(testedRootDeviceIndex).builtinDispatchBuilder); clReleaseKernel(kernel); clReleaseProgram(program); @@ -249,11 +249,11 @@ TEST_F(clCreateProgramWithBuiltInVmeKernelsTests, GivenVmeBlockAdvancedMotionEst &retVal); auto kernNeo = castToObject(kernel); - EXPECT_NE(nullptr, kernNeo->getKernelInfo().builtinDispatchBuilder); + EXPECT_NE(nullptr, kernNeo->getKernelInfo(testedRootDeviceIndex).builtinDispatchBuilder); EXPECT_EQ(15U, kernNeo->getKernelArgsNumber()); auto &vmeBuilder = Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel, *pClDevice); - EXPECT_EQ(&vmeBuilder, kernNeo->getKernelInfo().builtinDispatchBuilder); + EXPECT_EQ(&vmeBuilder, kernNeo->getKernelInfo(testedRootDeviceIndex).builtinDispatchBuilder); clReleaseKernel(kernel); clReleaseProgram(program); @@ -285,12 +285,12 @@ TEST_F(clCreateProgramWithBuiltInVmeKernelsTests, GivenVmeBlockAdvancedMotionEst &retVal); auto kernNeo = castToObject(kernel); - EXPECT_NE(nullptr, kernNeo->getKernelInfo().builtinDispatchBuilder); + EXPECT_NE(nullptr, kernNeo->getKernelInfo(testedRootDeviceIndex).builtinDispatchBuilder); EXPECT_EQ(20U, kernNeo->getKernelArgsNumber()); auto ctxNeo = castToObject(pContext); auto &vmeBuilder = Vme::getBuiltinDispatchInfoBuilder(EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel, *ctxNeo->getDevice(0)); - EXPECT_EQ(&vmeBuilder, kernNeo->getKernelInfo().builtinDispatchBuilder); + EXPECT_EQ(&vmeBuilder, kernNeo->getKernelInfo(testedRootDeviceIndex).builtinDispatchBuilder); clReleaseKernel(kernel); clReleaseProgram(program); diff --git a/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_khr_tests.inl b/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_khr_tests.inl index b8f0b9b5ea..34171356f4 100644 --- a/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_khr_tests.inl +++ b/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_khr_tests.inl @@ -14,7 +14,7 @@ struct KernelSubGroupInfoKhrFixture : HelloWorldFixture(pKernel->getKernelInfo().getMaxSimdSize()); + MaxSimdSize = static_cast(pKernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize()); ASSERT_GE(MaxSimdSize, 8u); MaxWorkDim = static_cast(pClDevice->getDeviceInfo().maxWorkItemDimensions); ASSERT_EQ(MaxWorkDim, 3u); @@ -141,11 +141,11 @@ TEST_F(KernelSubGroupInfoKhrReturnCompileSizeTest, GivenKernelWhenGettingRequire EXPECT_EQ(paramValueSizeRet, sizeof(size_t)); size_t requiredSubGroupSize = 0; - auto start = pKernel->getKernelInfo().attributes.find("intel_reqd_sub_group_size("); + auto start = pKernel->getKernelInfo(rootDeviceIndex).attributes.find("intel_reqd_sub_group_size("); if (start != std::string::npos) { start += strlen("intel_reqd_sub_group_size("); - auto stop = pKernel->getKernelInfo().attributes.find(")", start); - requiredSubGroupSize = stoi(pKernel->getKernelInfo().attributes.substr(start, stop - start)); + auto stop = pKernel->getKernelInfo(rootDeviceIndex).attributes.find(")", start); + requiredSubGroupSize = stoi(pKernel->getKernelInfo(rootDeviceIndex).attributes.substr(start, stop - start)); } EXPECT_EQ(paramValue, requiredSubGroupSize); diff --git a/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_tests.inl b/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_tests.inl index 5050ef7ffe..c63d96bb57 100644 --- a/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_tests.inl +++ b/opencl/test/unit_test/api/cl_get_kernel_sub_group_info_tests.inl @@ -16,18 +16,18 @@ struct KernelSubGroupInfoFixture : HelloWorldFixture { void SetUp() override { ParentClass::SetUp(); pKernel->maxKernelWorkGroupSize = static_cast(pDevice->getDeviceInfo().maxWorkGroupSize / 2); - maxSimdSize = static_cast(pKernel->getKernelInfo().getMaxSimdSize()); + maxSimdSize = static_cast(pKernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize()); ASSERT_LE(8u, maxSimdSize); maxWorkDim = static_cast(pClDevice->getDeviceInfo().maxWorkItemDimensions); ASSERT_EQ(3u, maxWorkDim); maxWorkGroupSize = static_cast(pKernel->maxKernelWorkGroupSize); ASSERT_GE(1024u, maxWorkGroupSize); - largestCompiledSIMDSize = static_cast(pKernel->getKernelInfo().patchInfo.executionEnvironment->LargestCompiledSIMDSize); + largestCompiledSIMDSize = static_cast(pKernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->LargestCompiledSIMDSize); ASSERT_EQ(32u, largestCompiledSIMDSize); - auto requiredWorkGroupSizeX = static_cast(pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]); - auto requiredWorkGroupSizeY = static_cast(pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1]); - auto requiredWorkGroupSizeZ = static_cast(pKernel->getKernelInfo().kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]); + auto requiredWorkGroupSizeX = static_cast(pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]); + auto requiredWorkGroupSizeY = static_cast(pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1]); + auto requiredWorkGroupSizeZ = static_cast(pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2]); calculatedMaxWorkgroupSize = requiredWorkGroupSizeX * requiredWorkGroupSizeY * requiredWorkGroupSizeZ; if ((calculatedMaxWorkgroupSize == 0) || (calculatedMaxWorkgroupSize > static_cast(pKernel->maxKernelWorkGroupSize))) { @@ -263,7 +263,7 @@ TEST_F(KernelSubGroupInfoReturnCompileNumberTest, GivenKernelWhenGettingCompileN EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSizeRet, sizeof(size_t)); - EXPECT_EQ(paramValue[0], static_cast(pKernel->getKernelInfo().patchInfo.executionEnvironment->CompiledSubGroupsNumber)); + EXPECT_EQ(paramValue[0], static_cast(pKernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->CompiledSubGroupsNumber)); } typedef KernelSubGroupInfoParamFixture KernelSubGroupInfoReturnCompileSizeTest; @@ -286,11 +286,11 @@ TEST_F(KernelSubGroupInfoReturnCompileSizeTest, GivenKernelWhenGettingCompileSub EXPECT_EQ(paramValueSizeRet, sizeof(size_t)); size_t requiredSubGroupSize = 0; - auto start = pKernel->getKernelInfo().attributes.find("intel_reqd_sub_group_size("); + auto start = pKernel->getKernelInfo(rootDeviceIndex).attributes.find("intel_reqd_sub_group_size("); if (start != std::string::npos) { start += strlen("intel_reqd_sub_group_size("); - auto stop = pKernel->getKernelInfo().attributes.find(")", start); - requiredSubGroupSize = stoi(pKernel->getKernelInfo().attributes.substr(start, stop - start)); + auto stop = pKernel->getKernelInfo(rootDeviceIndex).attributes.find(")", start); + requiredSubGroupSize = stoi(pKernel->getKernelInfo(rootDeviceIndex).attributes.substr(start, stop - start)); } EXPECT_EQ(paramValue[0], requiredSubGroupSize); diff --git a/opencl/test/unit_test/api/cl_get_kernel_work_group_info_tests.inl b/opencl/test/unit_test/api/cl_get_kernel_work_group_info_tests.inl index eea6181894..3be843e6a8 100644 --- a/opencl/test/unit_test/api/cl_get_kernel_work_group_info_tests.inl +++ b/opencl/test/unit_test/api/cl_get_kernel_work_group_info_tests.inl @@ -134,7 +134,7 @@ TEST_F(clGetKernelWorkGroupInfoTest, GivenNullDeviceWhenGettingWorkGroupInfoFrom size_t paramValueSizeRet; MockUnrestrictiveContext context; auto mockProgram = std::make_unique(&context, false, context.getDevices()); - auto mockKernel = std::make_unique(mockProgram.get(), MockKernel::toKernelInfoContainer(pKernel->getKernelInfo(), context.getDevice(0)->getRootDeviceIndex())); + auto mockKernel = std::make_unique(mockProgram.get(), MockKernel::toKernelInfoContainer(pKernel->getKernelInfo(testedRootDeviceIndex), context.getDevice(0)->getRootDeviceIndex())); retVal = clGetKernelWorkGroupInfo( mockKernel.get(), diff --git a/opencl/test/unit_test/api/cl_mem_locally_uncached_resource_tests.cpp b/opencl/test/unit_test/api/cl_mem_locally_uncached_resource_tests.cpp index 7cd82259fc..78e7761949 100644 --- a/opencl/test/unit_test/api/cl_mem_locally_uncached_resource_tests.cpp +++ b/opencl/test/unit_test/api/cl_mem_locally_uncached_resource_tests.cpp @@ -30,7 +30,7 @@ uint32_t argMocs(Kernel &kernel, size_t argIndex) { auto rootDeviceIndex = kernel.getDevices()[0]->getRootDeviceIndex(); using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; auto surfaceStateHeapAddress = kernel.getSurfaceStateHeap(rootDeviceIndex); - auto surfaceStateHeapAddressOffset = kernel.getKernelInfo().kernelArgInfo[argIndex].offsetHeap; + auto surfaceStateHeapAddressOffset = kernel.getKernelInfo(rootDeviceIndex).kernelArgInfo[argIndex].offsetHeap; auto surfaceState = reinterpret_cast(ptrOffset(surfaceStateHeapAddress, surfaceStateHeapAddressOffset)); return surfaceState->getMemoryObjectControlState(); } diff --git a/opencl/test/unit_test/aub_tests/command_queue/enqueue_kernel_aub_tests.cpp b/opencl/test/unit_test/aub_tests/command_queue/enqueue_kernel_aub_tests.cpp index 57618d6965..a2e4f9ec07 100644 --- a/opencl/test/unit_test/aub_tests/command_queue/enqueue_kernel_aub_tests.cpp +++ b/opencl/test/unit_test/aub_tests/command_queue/enqueue_kernel_aub_tests.cpp @@ -114,7 +114,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, AUBHelloWorld, simple) { auto pSBA = reinterpret_cast(cmdStateBaseAddress); ASSERT_NE(nullptr, pSBA); - auto pISA = pKernel->getKernelInfo().getGraphicsAllocation()->getUnderlyingBuffer(); + auto pISA = pKernel->getKernelInfo(rootDeviceIndex).getGraphicsAllocation()->getUnderlyingBuffer(); EXPECT_EQ(0, memcmp(pISA, pExpectedISA, expectedSize)); } @@ -273,7 +273,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, AUBSimpleArg, simple) { auto pSBA = reinterpret_cast(cmdStateBaseAddress); ASSERT_NE(nullptr, pSBA); - auto pISA = pKernel->getKernelInfo().getGraphicsAllocation()->getUnderlyingBuffer(); + auto pISA = pKernel->getKernelInfo(rootDeviceIndex).getGraphicsAllocation()->getUnderlyingBuffer(); EXPECT_EQ(0, memcmp(pISA, pExpectedISA, expectedSize)); } @@ -502,8 +502,8 @@ HWTEST_F(AUBSimpleKernelStatelessTest, givenSimpleKernelWhenStatelessPathIsUsedT ASSERT_EQ(CL_SUCCESS, retVal); - EXPECT_FALSE(this->kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess); - EXPECT_TRUE(this->kernel->getKernelInfo().patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers); + EXPECT_FALSE(this->kernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].pureStatefulBufferAccess); + EXPECT_TRUE(this->kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers); this->pCmdQ->flush(); expectMemory(reinterpret_cast(pBuffer->getGraphicsAllocation(device->getRootDeviceIndex())->getGpuAddress()), @@ -937,7 +937,7 @@ HWTEST2_F(AUBBindlessKernel, givenBindlessCopyKernelWhenEnqueuedThenResultsValid ASSERT_EQ(CL_SUCCESS, retVal); - EXPECT_TRUE(this->kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess); + EXPECT_TRUE(this->kernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].pureStatefulBufferAccess); this->pCmdQ->finish(); expectMemory(reinterpret_cast(pBufferDst->getGraphicsAllocation(device->getRootDeviceIndex())->getGpuAddress()), diff --git a/opencl/test/unit_test/built_ins/built_in_tests.cpp b/opencl/test/unit_test/built_ins/built_in_tests.cpp index b9768faf58..ea3877944c 100644 --- a/opencl/test/unit_test/built_ins/built_in_tests.cpp +++ b/opencl/test/unit_test/built_ins/built_in_tests.cpp @@ -416,12 +416,12 @@ TEST_F(BuiltInTests, givenkAuxBuiltInWhenResizeIsCalledThenCloneAllNewInstancesF EXPECT_EQ(newSize, mockAuxBuiltInOp.convertToAuxKernel.size()); for (auto &convertToAuxKernel : mockAuxBuiltInOp.convertToAuxKernel) { - EXPECT_EQ(&mockAuxBuiltInOp.baseKernel->getKernelInfo(), &convertToAuxKernel->getKernelInfo()); + EXPECT_EQ(&mockAuxBuiltInOp.baseKernel->getKernelInfo(rootDeviceIndex), &convertToAuxKernel->getKernelInfo(rootDeviceIndex)); } EXPECT_EQ(newSize, mockAuxBuiltInOp.convertToNonAuxKernel.size()); for (auto &convertToNonAuxKernel : mockAuxBuiltInOp.convertToNonAuxKernel) { - EXPECT_EQ(&mockAuxBuiltInOp.baseKernel->getKernelInfo(), &convertToNonAuxKernel->getKernelInfo()); + EXPECT_EQ(&mockAuxBuiltInOp.baseKernel->getKernelInfo(rootDeviceIndex), &convertToNonAuxKernel->getKernelInfo(rootDeviceIndex)); } } @@ -488,12 +488,12 @@ HWCMDTEST_F(IGFX_GEN8_CORE, BuiltInTests, givenAuxTranslationKernelWhenSettingKe auto argNum = 0; auto expectedMocs = pDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED); auto sshBase = mockAuxBuiltInOp.convertToAuxKernel[0]->getSurfaceStateHeap(rootDeviceIndex); - auto sshOffset = mockAuxBuiltInOp.convertToAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap; + auto sshOffset = mockAuxBuiltInOp.convertToAuxKernel[0]->getKernelInfo(rootDeviceIndex).kernelArgInfo[argNum].offsetHeap; auto surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(expectedMocs, surfaceState->getMemoryObjectControlState()); sshBase = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getSurfaceStateHeap(rootDeviceIndex); - sshOffset = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap; + sshOffset = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getKernelInfo(rootDeviceIndex).kernelArgInfo[argNum].offsetHeap; surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(expectedMocs, surfaceState->getMemoryObjectControlState()); } @@ -503,12 +503,12 @@ HWCMDTEST_F(IGFX_GEN8_CORE, BuiltInTests, givenAuxTranslationKernelWhenSettingKe auto argNum = 1; auto expectedMocs = pDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); auto sshBase = mockAuxBuiltInOp.convertToAuxKernel[0]->getSurfaceStateHeap(rootDeviceIndex); - auto sshOffset = mockAuxBuiltInOp.convertToAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap; + auto sshOffset = mockAuxBuiltInOp.convertToAuxKernel[0]->getKernelInfo(rootDeviceIndex).kernelArgInfo[argNum].offsetHeap; auto surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(expectedMocs, surfaceState->getMemoryObjectControlState()); sshBase = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getSurfaceStateHeap(rootDeviceIndex); - sshOffset = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap; + sshOffset = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getKernelInfo(rootDeviceIndex).kernelArgInfo[argNum].offsetHeap; surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(expectedMocs, surfaceState->getMemoryObjectControlState()); } @@ -545,7 +545,7 @@ HWTEST_F(BuiltInTests, givenAuxToNonAuxTranslationWhenSettingSurfaceStateThenSet // read arg auto argNum = 0; auto sshBase = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getSurfaceStateHeap(rootDeviceIndex); - auto sshOffset = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap; + auto sshOffset = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getKernelInfo(rootDeviceIndex).kernelArgInfo[argNum].offsetHeap; auto surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E, surfaceState->getAuxiliarySurfaceMode()); } @@ -554,7 +554,7 @@ HWTEST_F(BuiltInTests, givenAuxToNonAuxTranslationWhenSettingSurfaceStateThenSet // write arg auto argNum = 1; auto sshBase = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getSurfaceStateHeap(rootDeviceIndex); - auto sshOffset = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap; + auto sshOffset = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getKernelInfo(rootDeviceIndex).kernelArgInfo[argNum].offsetHeap; auto surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE, surfaceState->getAuxiliarySurfaceMode()); } @@ -590,7 +590,7 @@ HWTEST_F(BuiltInTests, givenNonAuxToAuxTranslationWhenSettingSurfaceStateThenSet // read arg auto argNum = 0; auto sshBase = mockAuxBuiltInOp.convertToAuxKernel[0]->getSurfaceStateHeap(rootDeviceIndex); - auto sshOffset = mockAuxBuiltInOp.convertToAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap; + auto sshOffset = mockAuxBuiltInOp.convertToAuxKernel[0]->getKernelInfo(rootDeviceIndex).kernelArgInfo[argNum].offsetHeap; auto surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE, surfaceState->getAuxiliarySurfaceMode()); } @@ -599,7 +599,7 @@ HWTEST_F(BuiltInTests, givenNonAuxToAuxTranslationWhenSettingSurfaceStateThenSet // write arg auto argNum = 1; auto sshBase = mockAuxBuiltInOp.convertToAuxKernel[0]->getSurfaceStateHeap(rootDeviceIndex); - auto sshOffset = mockAuxBuiltInOp.convertToAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap; + auto sshOffset = mockAuxBuiltInOp.convertToAuxKernel[0]->getKernelInfo(rootDeviceIndex).kernelArgInfo[argNum].offsetHeap; auto surfaceState = reinterpret_cast(ptrOffset(sshBase, sshOffset)); EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E, surfaceState->getAuxiliarySurfaceMode()); } @@ -763,8 +763,8 @@ HWTEST_F(BuiltInTests, givenBigOffsetAndSizeWhenBuilderCopyBufferToImageStateles auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); - EXPECT_TRUE(kernel->getKernelInfo().patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers); - EXPECT_FALSE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess); + EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers); + EXPECT_FALSE(kernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].pureStatefulBufferAccess); } HWTEST_F(BuiltInTests, givenBigOffsetAndSizeWhenBuilderCopyImageToBufferStatelessIsUsedThenParamsAreCorrect) { @@ -797,8 +797,8 @@ HWTEST_F(BuiltInTests, givenBigOffsetAndSizeWhenBuilderCopyImageToBufferStateles auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); - EXPECT_TRUE(kernel->getKernelInfo().patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers); - EXPECT_FALSE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess); + EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers); + EXPECT_FALSE(kernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].pureStatefulBufferAccess); } TEST_F(BuiltInTests, GivenUnalignedCopyBufferToBufferWhenDispatchInfoIsCreatedThenParamsAreCorrect) { @@ -821,7 +821,7 @@ TEST_F(BuiltInTests, GivenUnalignedCopyBufferToBufferWhenDispatchInfoIsCreatedTh const DispatchInfo *dispatchInfo = multiDispatchInfo.begin(); - EXPECT_EQ(dispatchInfo->getKernel()->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName, "CopyBufferToBufferLeftLeftover"); + EXPECT_EQ(dispatchInfo->getKernel()->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName, "CopyBufferToBufferLeftLeftover"); EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), builtinOpsParams)); } @@ -917,7 +917,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, BuiltInTests, WhenGettingSchedulerKernelThenCorrectK REQUIRE_OCL_21_OR_SKIP(defaultHwInfo); SchedulerKernel &schedulerKernel = pContext->getSchedulerKernel(); std::string name = SchedulerKernel::schedulerName; - EXPECT_EQ(name, schedulerKernel.getKernelInfo().kernelDescriptor.kernelMetadata.kernelName); + EXPECT_EQ(name, schedulerKernel.getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName); } HWCMDTEST_F(IGFX_GEN8_CORE, BuiltInTests, WhenGetttingSchedulerKernelForSecondTimeThenReuseKernel) { @@ -1042,10 +1042,10 @@ TEST_F(VmeBuiltInTests, GivenVmeBuilderWhenGettingDispatchInfoThenParamsAreCorre constexpr uint32_t vmeImplicitArgsBase = 6; constexpr uint32_t vmeImplicitArgs = 3; - ASSERT_EQ(vmeImplicitArgsBase + vmeImplicitArgs, outDi->getKernel()->getKernelInfo().kernelArgInfo.size()); + ASSERT_EQ(vmeImplicitArgsBase + vmeImplicitArgs, outDi->getKernel()->getKernelInfo(rootDeviceIndex).kernelArgInfo.size()); uint32_t vmeExtraArgsExpectedVals[] = {18, 22, 18}; // height, width, stride for (uint32_t i = 0; i < vmeImplicitArgs; ++i) { - auto &argInfo = outDi->getKernel()->getKernelInfo().kernelArgInfo[vmeImplicitArgsBase + i]; + auto &argInfo = outDi->getKernel()->getKernelInfo(rootDeviceIndex).kernelArgInfo[vmeImplicitArgsBase + i]; ASSERT_EQ(1U, argInfo.kernelArgPatchInfoVector.size()); auto off = argInfo.kernelArgPatchInfoVector[0].crossthreadOffset; EXPECT_EQ(vmeExtraArgsExpectedVals[i], *((uint32_t *)(outDi->getKernel()->getCrossThreadData(rootDeviceIndex) + off))); @@ -1100,15 +1100,15 @@ TEST_F(VmeBuiltInTests, GivenAdvancedVmeBuilderWhenGettingDispatchInfoThenParams EXPECT_EQ(srcImageArg, outDi->getKernel()->getKernelArg(srcImageArgNum)); - uint32_t vmeImplicitArgsBase = outDi->getKernel()->getKernelInfo().getArgNumByName("intraSrcImg"); + uint32_t vmeImplicitArgsBase = outDi->getKernel()->getKernelInfo(rootDeviceIndex).getArgNumByName("intraSrcImg"); uint32_t vmeImplicitArgs = 4; - ASSERT_EQ(vmeImplicitArgsBase + vmeImplicitArgs, outDi->getKernel()->getKernelInfo().kernelArgInfo.size()); + ASSERT_EQ(vmeImplicitArgsBase + vmeImplicitArgs, outDi->getKernel()->getKernelInfo(rootDeviceIndex).kernelArgInfo.size()); EXPECT_EQ(srcImageArg, outDi->getKernel()->getKernelArg(vmeImplicitArgsBase)); ++vmeImplicitArgsBase; --vmeImplicitArgs; uint32_t vmeExtraArgsExpectedVals[] = {18, 22, 18}; // height, width, stride for (uint32_t i = 0; i < vmeImplicitArgs; ++i) { - auto &argInfo = outDi->getKernel()->getKernelInfo().kernelArgInfo[vmeImplicitArgsBase + i]; + auto &argInfo = outDi->getKernel()->getKernelInfo(rootDeviceIndex).kernelArgInfo[vmeImplicitArgsBase + i]; ASSERT_EQ(1U, argInfo.kernelArgPatchInfoVector.size()); auto off = argInfo.kernelArgPatchInfoVector[0].crossthreadOffset; EXPECT_EQ(vmeExtraArgsExpectedVals[i], *((uint32_t *)(outDi->getKernel()->getCrossThreadData(rootDeviceIndex) + off))); diff --git a/opencl/test/unit_test/command_queue/command_enqueue_fixture.h b/opencl/test/unit_test/command_queue/command_enqueue_fixture.h index a8cc0871de..0885e76974 100644 --- a/opencl/test/unit_test/command_queue/command_enqueue_fixture.h +++ b/opencl/test/unit_test/command_queue/command_enqueue_fixture.h @@ -100,8 +100,9 @@ struct CommandQueueStateless : public CommandQueueHw { void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo) override { auto kernel = dispatchInfo.begin()->getKernel(); - EXPECT_TRUE(kernel->getKernelInfo().patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers); - EXPECT_FALSE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess); + auto rootDeviceIndex = this->device->getRootDeviceIndex(); + EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers); + EXPECT_FALSE(kernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].pureStatefulBufferAccess); } }; @@ -112,14 +113,15 @@ struct CommandQueueStateful : public CommandQueueHw { void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo) override { auto kernel = dispatchInfo.begin()->getKernel(); auto &device = dispatchInfo.begin()->getClDevice(); + auto rootDeviceIndex = device.getRootDeviceIndex(); if (!device.areSharedSystemAllocationsAllowed()) { - EXPECT_FALSE(kernel->getKernelInfo().patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers); + EXPECT_FALSE(kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers); if (device.getHardwareCapabilities().isStatelesToStatefullWithOffsetSupported) { EXPECT_TRUE(kernel->allBufferArgsStateful); } } else { - EXPECT_TRUE(kernel->getKernelInfo().patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers); - EXPECT_FALSE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess); + EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers); + EXPECT_FALSE(kernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].pureStatefulBufferAccess); } } }; diff --git a/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp b/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp index d1ddbe9d17..204bf9ad31 100644 --- a/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp +++ b/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp @@ -731,7 +731,7 @@ HWTEST_F(DispatchWalkerTest, GivenBlockedQueueWhenDispatchingWalkerThenRequiredH Vec3 localWorkgroupSize(workGroupSize); - auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(kernel); + auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(rootDeviceIndex, kernel); auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(rootDeviceIndex, kernel, Math::computeTotalElementsCount(localWorkgroupSize)); auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(kernel, rootDeviceIndex); diff --git a/opencl/test/unit_test/command_queue/enqueue_copy_buffer_rect_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_copy_buffer_rect_tests.cpp index 7b9cc7d229..1963ee4563 100644 --- a/opencl/test/unit_test/command_queue/enqueue_copy_buffer_rect_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_copy_buffer_rect_tests.cpp @@ -211,8 +211,8 @@ HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRectStatelessThenStatelessK auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); - EXPECT_TRUE(kernel->getKernelInfo().patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers); - EXPECT_FALSE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess); + EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers); + EXPECT_FALSE(kernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].pureStatefulBufferAccess); } HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenL3ProgrammingIsCorrect) { diff --git a/opencl/test/unit_test/command_queue/enqueue_copy_buffer_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_copy_buffer_tests.cpp index eb3389f9ac..396791d100 100644 --- a/opencl/test/unit_test/command_queue/enqueue_copy_buffer_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_copy_buffer_tests.cpp @@ -230,8 +230,8 @@ HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferStatelessThenStatelessKernelIsU EXPECT_NE(0u, multiDispatchInfo.size()); auto kernel = multiDispatchInfo.begin()->getKernel(); - EXPECT_TRUE(kernel->getKernelInfo().patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers); - EXPECT_FALSE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess); + EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers); + EXPECT_FALSE(kernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].pureStatefulBufferAccess); } HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferThenL3ProgrammingIsCorrect) { diff --git a/opencl/test/unit_test/command_queue/enqueue_debug_kernel_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_debug_kernel_tests.cpp index ce1d76488e..fdbdd43860 100644 --- a/opencl/test/unit_test/command_queue/enqueue_debug_kernel_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_debug_kernel_tests.cpp @@ -159,7 +159,7 @@ HWTEST_F(EnqueueDebugKernelSimpleTest, givenKernelFromProgramWithDebugEnabledWhe std::unique_ptr> mockCmdQ(new GMockCommandQueueHw(context, pClDevice, 0)); mockCmdQ->getGpgpuCommandStreamReceiver().allocateDebugSurface(SipKernel::maxDbgSurfaceSize); - EXPECT_NE(nullptr, kernel->getKernelInfo().patchInfo.pAllocateSystemThreadSurface); + EXPECT_NE(nullptr, kernel->getKernelInfo(rootDeviceIndex).patchInfo.pAllocateSystemThreadSurface); EXPECT_CALL(*mockCmdQ.get(), setupDebugSurface(kernel.get())).Times(1).RetiresOnSaturation(); @@ -175,7 +175,7 @@ HWTEST_F(EnqueueDebugKernelSimpleTest, givenKernelWithoutSystemThreadSurfaceWhen std::unique_ptr kernel(MockKernel::create(*pDevice, &program)); kernel->initialize(); - EXPECT_EQ(nullptr, kernel->getKernelInfo().patchInfo.pAllocateSystemThreadSurface); + EXPECT_EQ(nullptr, kernel->getKernelInfo(rootDeviceIndex).patchInfo.pAllocateSystemThreadSurface); std::unique_ptr> mockCmdQ(new GMockCommandQueueHw(context, pClDevice, 0)); diff --git a/opencl/test/unit_test/command_queue/enqueue_fill_buffer_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_fill_buffer_tests.cpp index 955c8cab28..d66b76f030 100644 --- a/opencl/test/unit_test/command_queue/enqueue_fill_buffer_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_fill_buffer_tests.cpp @@ -146,7 +146,7 @@ HWTEST_F(EnqueueFillBufferCmdTests, FillBufferRightLeftover) { EXPECT_EQ(1u, mdi.size()); auto kernel = mdi.begin()->getKernel(); - EXPECT_STREQ("FillBufferRightLeftover", kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str()); + EXPECT_STREQ("FillBufferRightLeftover", kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str()); context.getMemoryManager()->freeGraphicsMemory(patternAllocation); } @@ -173,7 +173,7 @@ HWTEST_F(EnqueueFillBufferCmdTests, FillBufferMiddle) { EXPECT_EQ(1u, mdi.size()); auto kernel = mdi.begin()->getKernel(); - EXPECT_STREQ("FillBufferMiddle", kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str()); + EXPECT_STREQ("FillBufferMiddle", kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str()); context.getMemoryManager()->freeGraphicsMemory(patternAllocation); } @@ -200,7 +200,7 @@ HWTEST_F(EnqueueFillBufferCmdTests, FillBufferLeftLeftover) { EXPECT_EQ(1u, mdi.size()); auto kernel = mdi.begin()->getKernel(); - EXPECT_STREQ("FillBufferLeftLeftover", kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str()); + EXPECT_STREQ("FillBufferLeftLeftover", kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str()); context.getMemoryManager()->freeGraphicsMemory(patternAllocation); } @@ -363,8 +363,8 @@ HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferStatelessThenStatelessKerne auto kernel = multiDispatchInfo.begin()->getKernel(); ASSERT_NE(nullptr, kernel); - EXPECT_TRUE(kernel->getKernelInfo().patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers); - EXPECT_FALSE(kernel->getKernelInfo().kernelArgInfo[0].pureStatefulBufferAccess); + EXPECT_TRUE(kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->CompiledForGreaterThan4GBBuffers); + EXPECT_FALSE(kernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].pureStatefulBufferAccess); context.getMemoryManager()->freeGraphicsMemory(patternAllocation); } diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp index cf6356c4ed..1827b206c4 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp @@ -417,7 +417,7 @@ HWTEST_F(EnqueueKernelTest, addsIndirectData) { callOneWorkItemNDRKernel(); EXPECT_TRUE(UnitTestHelper::evaluateDshUsage(dshBefore, pDSH->getUsed(), pKernel)); EXPECT_NE(iohBefore, pIOH->getUsed()); - if (pKernel->requiresSshForBuffers() || (pKernel->getKernelInfo().patchInfo.imageMemObjKernelArgs.size() > 0)) { + if (pKernel->requiresSshForBuffers() || (pKernel->getKernelInfo(rootDeviceIndex).patchInfo.imageMemObjKernelArgs.size() > 0)) { EXPECT_NE(sshBefore, pSSH->getUsed()); } } diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp index 6cc4f4119e..927ba7ddfe 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp @@ -909,13 +909,13 @@ HWTEST_F(EnqueueAuxKernelTests, givenKernelWithRequiredAuxTranslationWhenEnqueue // before kernel EXPECT_EQ(1u, std::get(cmdQ.dispatchAuxTranslationInputs.at(0))); // aux before NDR auto kernelBefore = std::get(cmdQ.dispatchAuxTranslationInputs.at(0)); - EXPECT_EQ("fullCopy", kernelBefore->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName); + EXPECT_EQ("fullCopy", kernelBefore->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName); EXPECT_TRUE(kernelBefore->isBuiltIn); // after kernel EXPECT_EQ(3u, std::get(cmdQ.dispatchAuxTranslationInputs.at(1))); // aux + NDR + aux auto kernelAfter = std::get(cmdQ.dispatchAuxTranslationInputs.at(1)); - EXPECT_EQ("fullCopy", kernelAfter->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName); + EXPECT_EQ("fullCopy", kernelAfter->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName); EXPECT_TRUE(kernelAfter->isBuiltIn); } diff --git a/opencl/test/unit_test/command_queue/enqueue_read_buffer_rect_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_read_buffer_rect_tests.cpp index a981f18199..5558b7c60d 100644 --- a/opencl/test/unit_test/command_queue/enqueue_read_buffer_rect_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_read_buffer_rect_tests.cpp @@ -569,26 +569,28 @@ HWTEST_F(EnqueueReadWriteBufferRectDispatch, givenOffsetResultingInMisalignedPtr parseCommands(*cmdQ); + auto &kernelInfo = kernel->getKernelInfo(device->getRootDeviceIndex()); + if (hwInfo->capabilityTable.gpuAddressSpace == MemoryConstants::max48BitAddress) { const auto &surfaceStateDst = getSurfaceState(&cmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), 1); - if (kernel->getKernelInfo().kernelArgInfo[1].kernelArgPatchInfoVector[0].size == sizeof(uint64_t)) { + if (kernelInfo.kernelArgInfo[1].kernelArgPatchInfoVector[0].size == sizeof(uint64_t)) { auto pKernelArg = (uint64_t *)(kernel->getCrossThreadData(device->getRootDeviceIndex()) + - kernel->getKernelInfo().kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset); + kernelInfo.kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(reinterpret_cast(alignDown(misalignedDstPtr, 4)), *pKernelArg); EXPECT_EQ(*pKernelArg, surfaceStateDst.getSurfaceBaseAddress()); - } else if (kernel->getKernelInfo().kernelArgInfo[1].kernelArgPatchInfoVector[0].size == sizeof(uint32_t)) { + } else if (kernelInfo.kernelArgInfo[1].kernelArgPatchInfoVector[0].size == sizeof(uint32_t)) { auto pKernelArg = (uint32_t *)(kernel->getCrossThreadData(device->getRootDeviceIndex()) + - kernel->getKernelInfo().kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset); + kernelInfo.kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(reinterpret_cast(alignDown(misalignedDstPtr, 4)), static_cast(*pKernelArg)); EXPECT_EQ(static_cast(*pKernelArg), surfaceStateDst.getSurfaceBaseAddress()); } } - if (kernel->getKernelInfo().kernelArgInfo[3].kernelArgPatchInfoVector[0].size == 4 * sizeof(uint32_t)) { // size of uint4 DstOrigin + if (kernelInfo.kernelArgInfo[3].kernelArgPatchInfoVector[0].size == 4 * sizeof(uint32_t)) { // size of uint4 DstOrigin auto dstOffset = (uint32_t *)(kernel->getCrossThreadData(device->getRootDeviceIndex()) + - kernel->getKernelInfo().kernelArgInfo[3].kernelArgPatchInfoVector[0].crossthreadOffset); + kernelInfo.kernelArgInfo[3].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(hostOffset.x + ptrDiff(misalignedDstPtr, alignDown(misalignedDstPtr, 4)), *dstOffset); } else { // DstOrigin arg should be 16 bytes in size, if that changes, above if path should be modified diff --git a/opencl/test/unit_test/command_queue/enqueue_svm_mem_copy_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_svm_mem_copy_tests.cpp index 39171b20bb..361d7b5969 100644 --- a/opencl/test/unit_test/command_queue/enqueue_svm_mem_copy_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_svm_mem_copy_tests.cpp @@ -138,7 +138,7 @@ HWTEST_F(EnqueueSvmMemCopyTest, givenEnqueueSVMMemcpyWhenUsingCopyBufferToBuffer EXPECT_EQ(Vec3(256 / middleElSize, 1, 1), di->getGWS()); auto kernel = mdi->begin()->getKernel(); - EXPECT_EQ("CopyBufferToBufferMiddle", kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName); + EXPECT_EQ("CopyBufferToBufferMiddle", kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName); } HWTEST_F(EnqueueSvmMemCopyTest, givenEnqueueSVMMemcpyWhenUsingCopyBufferToBufferBuilderAndSrcHostPtrThenItConfiguredWithBuiltinOpsAndProducesDispatchInfo) { diff --git a/opencl/test/unit_test/command_queue/enqueue_svm_mem_fill_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_svm_mem_fill_tests.cpp index ac292ab1a1..2ef0932915 100644 --- a/opencl/test/unit_test/command_queue/enqueue_svm_mem_fill_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_svm_mem_fill_tests.cpp @@ -139,7 +139,7 @@ HWTEST_P(EnqueueSvmMemFillTest, givenEnqueueSVMMemFillWhenUsingFillBufferBuilder EXPECT_EQ(Vec3(256 / middleElSize, 1, 1), di->getGWS()); auto kernel = di->getKernel(); - EXPECT_STREQ("FillBufferMiddle", kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str()); + EXPECT_STREQ("FillBufferMiddle", kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str()); } INSTANTIATE_TEST_CASE_P(size_t, diff --git a/opencl/test/unit_test/command_queue/enqueue_write_buffer_rect_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_write_buffer_rect_tests.cpp index f7c2a9dbf0..64bc0b0adc 100644 --- a/opencl/test/unit_test/command_queue/enqueue_write_buffer_rect_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_write_buffer_rect_tests.cpp @@ -567,27 +567,28 @@ HWTEST_F(EnqueueReadWriteBufferRectDispatch, givenOffsetResultingInMisalignedPtr cmdQ->finish(); parseCommands(*cmdQ); + auto &kernelInfo = kernel->getKernelInfo(device->getRootDeviceIndex()); if (hwInfo->capabilityTable.gpuAddressSpace == MemoryConstants::max48BitAddress) { const auto &surfaceState = getSurfaceState(&cmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), 0); - if (kernel->getKernelInfo().kernelArgInfo[0].kernelArgPatchInfoVector[0].size == sizeof(uint64_t)) { + if (kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector[0].size == sizeof(uint64_t)) { auto pKernelArg = (uint64_t *)(kernel->getCrossThreadData(device->getRootDeviceIndex()) + - kernel->getKernelInfo().kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); + kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(reinterpret_cast(alignDown(misalignedHostPtr, 4)), *pKernelArg); EXPECT_EQ(*pKernelArg, surfaceState.getSurfaceBaseAddress()); - } else if (kernel->getKernelInfo().kernelArgInfo[0].kernelArgPatchInfoVector[0].size == sizeof(uint32_t)) { + } else if (kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector[0].size == sizeof(uint32_t)) { auto pKernelArg = (uint32_t *)(kernel->getCrossThreadData(device->getRootDeviceIndex()) + - kernel->getKernelInfo().kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); + kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(reinterpret_cast(alignDown(misalignedHostPtr, 4)), static_cast(*pKernelArg)); EXPECT_EQ(static_cast(*pKernelArg), surfaceState.getSurfaceBaseAddress()); } } - if (kernel->getKernelInfo().kernelArgInfo[2].kernelArgPatchInfoVector[0].size == 4 * sizeof(uint32_t)) { // size of uint4 SrcOrigin + if (kernelInfo.kernelArgInfo[2].kernelArgPatchInfoVector[0].size == 4 * sizeof(uint32_t)) { // size of uint4 SrcOrigin auto dstOffset = (uint32_t *)(kernel->getCrossThreadData(device->getRootDeviceIndex()) + - kernel->getKernelInfo().kernelArgInfo[2].kernelArgPatchInfoVector[0].crossthreadOffset); + kernelInfo.kernelArgInfo[2].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(hostOffset.x + ptrDiff(misalignedHostPtr, alignDown(misalignedHostPtr, 4)), *dstOffset); } else { // SrcOrigin arg should be 16 bytes in size, if that changes, above if path should be modified diff --git a/opencl/test/unit_test/command_queue/get_size_required_buffer_tests.cpp b/opencl/test/unit_test/command_queue/get_size_required_buffer_tests.cpp index e79b6b52f1..e18212b65a 100644 --- a/opencl/test/unit_test/command_queue/get_size_required_buffer_tests.cpp +++ b/opencl/test/unit_test/command_queue/get_size_required_buffer_tests.cpp @@ -437,7 +437,7 @@ HWTEST_F(GetSizeRequiredBufferTest, GivenHelloWorldKernelWhenEnqueingKernelThenH auto sshAfter = pSSH->getUsed(); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *pCmdQ, KernelFixture::pKernel); - auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*KernelFixture::pKernel); + auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(rootDeviceIndex, *KernelFixture::pKernel); auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(rootDeviceIndex, *KernelFixture::pKernel, workSize[0]); auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*KernelFixture::pKernel, rootDeviceIndex); @@ -476,7 +476,7 @@ HWTEST_F(GetSizeRequiredBufferTest, GivenKernelWithSimpleArgWhenEnqueingKernelTh auto sshAfter = pSSH->getUsed(); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *pCmdQ, KernelFixture::pKernel); - auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*KernelFixture::pKernel); + auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(rootDeviceIndex, *KernelFixture::pKernel); auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(rootDeviceIndex, *KernelFixture::pKernel, workSize[0]); auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*KernelFixture::pKernel, rootDeviceIndex); diff --git a/opencl/test/unit_test/command_queue/get_size_required_image_tests.cpp b/opencl/test/unit_test/command_queue/get_size_required_image_tests.cpp index d328bb5521..7942a3cf74 100644 --- a/opencl/test/unit_test/command_queue/get_size_required_image_tests.cpp +++ b/opencl/test/unit_test/command_queue/get_size_required_image_tests.cpp @@ -95,7 +95,7 @@ HWTEST_F(GetSizeRequiredImageTest, WhenCopyingImageThenHeapsAndCommandBufferCons auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_COPY_IMAGE, false, false, *pCmdQ, kernel); - auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*kernel); + auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(rootDeviceIndex, *kernel); auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(rootDeviceIndex, *kernel); auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel, rootDeviceIndex); @@ -125,12 +125,12 @@ HWTEST_F(GetSizeRequiredImageTest, WhenCopyingReadWriteImageThenHeapsAndCommandB EXPECT_NE(nullptr, kernel); // This kernel does not operate on OpenCL 2.0 Read and Write images - EXPECT_EQ(kernel->getKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages, (uint32_t) false); + EXPECT_EQ(kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->UsesFencesForReadWriteImages, (uint32_t) false); // Simulate that the kernel actually operates on OpenCL 2.0 Read and Write images. // Such kernel may require special WA DisableLSQCROPERFforOCL during construction of Command Buffer - struct SPatchExecutionEnvironment *pExecEnv = (struct SPatchExecutionEnvironment *)kernel->getKernelInfo().patchInfo.executionEnvironment; + struct SPatchExecutionEnvironment *pExecEnv = (struct SPatchExecutionEnvironment *)kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment; pExecEnv->UsesFencesForReadWriteImages = (uint32_t) true; - EXPECT_EQ(kernel->getKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages, (uint32_t) true); + EXPECT_EQ(kernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->UsesFencesForReadWriteImages, (uint32_t) true); // Enqueue kernel that may require special WA DisableLSQCROPERFforOCL auto retVal = EnqueueKernelHelper<>::enqueueKernel(pCmdQ, kernel.get()); @@ -142,7 +142,7 @@ HWTEST_F(GetSizeRequiredImageTest, WhenCopyingReadWriteImageThenHeapsAndCommandB auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_COPY_IMAGE, false, false, *pCmdQ, kernel.get()); - auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*kernel.get()); + auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(rootDeviceIndex, *kernel.get()); auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(rootDeviceIndex, *kernel.get()); auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel.get(), rootDeviceIndex); @@ -199,7 +199,7 @@ HWTEST_F(GetSizeRequiredImageTest, WhenReadingImageNonBlockingThenHeapsAndComman auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_READ_IMAGE, false, false, *pCmdQ, kernel); - auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*kernel); + auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(rootDeviceIndex, *kernel); auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(rootDeviceIndex, *kernel); auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel, rootDeviceIndex); @@ -254,7 +254,7 @@ HWTEST_F(GetSizeRequiredImageTest, WhenReadingImageBlockingThenHeapsAndCommandBu auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_READ_IMAGE, false, false, *pCmdQ, kernel); - auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*kernel); + auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(rootDeviceIndex, *kernel); auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(rootDeviceIndex, *kernel); auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel, rootDeviceIndex); @@ -309,7 +309,7 @@ HWTEST_F(GetSizeRequiredImageTest, WhenWritingImageNonBlockingThenHeapsAndComman auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_WRITE_IMAGE, false, false, *pCmdQ, kernel); - auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*kernel); + auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(rootDeviceIndex, *kernel); auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(rootDeviceIndex, *kernel); auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel, rootDeviceIndex); @@ -364,7 +364,7 @@ HWTEST_F(GetSizeRequiredImageTest, WhenWritingImageBlockingThenHeapsAndCommandBu auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_WRITE_IMAGE, false, false, *pCmdQ, kernel); - auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(*kernel); + auto expectedSizeDSH = HardwareCommandsHelper::getSizeRequiredDSH(rootDeviceIndex, *kernel); auto expectedSizeIOH = HardwareCommandsHelper::getSizeRequiredIOH(rootDeviceIndex, *kernel); auto expectedSizeSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel, rootDeviceIndex); diff --git a/opencl/test/unit_test/command_queue/gl/windows/enqueue_kernel_gl_tests_windows.cpp b/opencl/test/unit_test/command_queue/gl/windows/enqueue_kernel_gl_tests_windows.cpp index 2eeaa4bff3..f217f83cea 100644 --- a/opencl/test/unit_test/command_queue/gl/windows/enqueue_kernel_gl_tests_windows.cpp +++ b/opencl/test/unit_test/command_queue/gl/windows/enqueue_kernel_gl_tests_windows.cpp @@ -42,7 +42,7 @@ TEST_F(EnqueueKernelTest, givenKernelWithSharedObjArgsWhenEnqueueIsCalledThenRes pKernel->setArg(0, sizeof(cl_mem *), &sharedMem); pKernel->setArg(1, sizeof(cl_mem *), &nonSharedMem); EXPECT_TRUE(pKernel->isUsingSharedObjArgs()); - auto &kernelInfo = pKernel->getKernelInfo(); + auto &kernelInfo = pKernel->getKernelInfo(rootDeviceIndex); auto pKernelArg = (uint32_t *)(pKernel->getCrossThreadData(rootDeviceIndex) + kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); diff --git a/opencl/test/unit_test/context/driver_diagnostics_enqueue_tests.cpp b/opencl/test/unit_test/context/driver_diagnostics_enqueue_tests.cpp index 68114b7d37..2d4fbbbe33 100644 --- a/opencl/test/unit_test/context/driver_diagnostics_enqueue_tests.cpp +++ b/opencl/test/unit_test/context/driver_diagnostics_enqueue_tests.cpp @@ -680,7 +680,7 @@ TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkS retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); - snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), + snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str(), *kernel->localWorkSizeX, *kernel->localWorkSizeY, *kernel->localWorkSizeZ); EXPECT_TRUE(containsHint(expectedHint, userData)); } @@ -692,7 +692,7 @@ TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkS retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); - snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), + snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str(), *kernel->localWorkSizeX, *kernel->localWorkSizeY, *kernel->localWorkSizeZ); EXPECT_TRUE(containsHint(expectedHint, userData)); DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled); @@ -705,7 +705,7 @@ TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkS retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); - snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), + snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str(), *kernel->localWorkSizeX, *kernel->localWorkSizeY, *kernel->localWorkSizeZ); EXPECT_TRUE(containsHint(expectedHint, userData)); DebugManager.flags.EnableComputeWorkSizeND.set(isWorkGroupSizeEnabled); @@ -716,7 +716,7 @@ TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkS retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); - snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), + snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str(), *kernel->localWorkSizeX, *kernel->localWorkSizeY, *kernel->localWorkSizeZ); EXPECT_TRUE(containsHint(expectedHint, userData)); } @@ -729,7 +729,7 @@ TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkS retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); - snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), + snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str(), *kernel->localWorkSizeX, *kernel->localWorkSizeY, *kernel->localWorkSizeZ); EXPECT_TRUE(containsHint(expectedHint, userData)); } @@ -742,7 +742,7 @@ TEST_F(PerformanceHintEnqueueKernelTest, GivenNullLocalSizeAndEnableComputeWorkS retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkGroupSize, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); - snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), + snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[NULL_LOCAL_WORKGROUP_SIZE], kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str(), *kernel->localWorkSizeX, *kernel->localWorkSizeY, *kernel->localWorkSizeZ); EXPECT_TRUE(containsHint(expectedHint, userData)); } @@ -771,7 +771,7 @@ TEST_P(PerformanceHintEnqueueKernelBadSizeTest, GivenBadLocalWorkGroupSizeWhenEn EXPECT_EQ(CL_SUCCESS, retVal); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[BAD_LOCAL_WORKGROUP_SIZE], - localWorkGroupSize[0], localWorkGroupSize[1], localWorkGroupSize[2], kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), + localWorkGroupSize[0], localWorkGroupSize[1], localWorkGroupSize[2], kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str(), computedLocalWorkgroupSize.x, computedLocalWorkgroupSize.y, computedLocalWorkgroupSize.z); EXPECT_TRUE(containsHint(expectedHint, userData)); } @@ -787,7 +787,7 @@ TEST_F(PerformanceHintEnqueueKernelPrintfTest, GivenKernelWithPrintfWhenEnqueueK retVal = pCmdQ->enqueueKernel(kernel, 3, nullptr, globalWorkGroupSize, preferredWorkGroupSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); - snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[PRINTF_DETECTED_IN_KERNEL], kernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str()); + snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[PRINTF_DETECTED_IN_KERNEL], kernel->getDefaultKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str()); EXPECT_TRUE(containsHint(expectedHint, userData)); } @@ -821,7 +821,7 @@ TEST_F(PerformanceHintEnqueueTest, GivenKernelWithCoherentPtrWhenEnqueueKernelIs EXPECT_EQ(CL_SUCCESS, retVal); - snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[KERNEL_REQUIRES_COHERENCY], mockKernel.mockKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str()); + snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[KERNEL_REQUIRES_COHERENCY], mockKernel.mockKernel->getDefaultKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str()); EXPECT_TRUE(containsHint(expectedHint, userData)); delete buffer; } diff --git a/opencl/test/unit_test/context/driver_diagnostics_tests.cpp b/opencl/test/unit_test/context/driver_diagnostics_tests.cpp index e4e1dc30be..aa10f64670 100644 --- a/opencl/test/unit_test/context/driver_diagnostics_tests.cpp +++ b/opencl/test/unit_test/context/driver_diagnostics_tests.cpp @@ -435,6 +435,7 @@ TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenCallF DebugManager.flags.PrintDriverDiagnostics.set(1); auto pDevice = castToObject(devices[0]); + auto rootDeviceIndex = pDevice->getRootDeviceIndex(); MockKernelWithInternals mockKernel(*pDevice, context); MockBuffer buffer; cl_mem clMem = &buffer; @@ -454,7 +455,7 @@ TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenCallF mockKernel.mockKernel->fillWithBuffersForAuxTranslation(memObjects); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[KERNEL_ARGUMENT_AUX_TRANSLATION], - mockKernel.mockKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), 0, mockKernel.mockKernel->getKernelInfo().kernelArgInfo.at(0).metadataExtended->argName.c_str()); + mockKernel.mockKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str(), 0, mockKernel.mockKernel->getKernelInfo(rootDeviceIndex).kernelArgInfo.at(0).metadataExtended->argName.c_str()); std::string output = testing::internal::GetCapturedStdout(); EXPECT_NE(0u, output.size()); @@ -707,6 +708,7 @@ TEST_F(PerformanceHintTest, givenUncompressedImageWhenItsCreatedThenProperPerfor TEST_P(PerformanceHintKernelTest, GivenSpillFillWhenKernelIsInitializedThenContextProvidesProperHint) { auto pDevice = castToObject(devices[0]); + auto rootDeviceIndex = pDevice->getRootDeviceIndex(); auto size = zeroSized ? 0 : 1024; MockKernelWithInternals mockKernel(*pDevice, context); SPatchMediaVFEState mediaVFEstate; @@ -714,17 +716,18 @@ TEST_P(PerformanceHintKernelTest, GivenSpillFillWhenKernelIsInitializedThenConte mediaVFEstate.PerThreadScratchSpace = size; mockKernel.kernelInfo.patchInfo.mediavfestate = &mediaVFEstate; - size *= pDevice->getSharedDeviceInfo().computeUnitsUsedForScratch * mockKernel.mockKernel->getKernelInfo().getMaxSimdSize(); + size *= pDevice->getSharedDeviceInfo().computeUnitsUsedForScratch * mockKernel.mockKernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize(); mockKernel.mockKernel->initialize(); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[REGISTER_PRESSURE_TOO_HIGH], - mockKernel.mockKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), size); + mockKernel.mockKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str(), size); EXPECT_EQ(!zeroSized, containsHint(expectedHint, userData)); } TEST_P(PerformanceHintKernelTest, GivenPrivateSurfaceWhenKernelIsInitializedThenContextProvidesProperHint) { auto pDevice = castToObject(devices[1]); + auto rootDeviceIndex = pDevice->getRootDeviceIndex(); static_cast(pDevice->getMemoryManager())->turnOnFakingBigAllocations(); for (auto isSmitThread : {false, true}) { @@ -742,12 +745,12 @@ TEST_P(PerformanceHintKernelTest, GivenPrivateSurfaceWhenKernelIsInitializedThen mockKernel.kernelInfo.patchInfo.pAllocateStatelessPrivateSurface = &allocateStatelessPrivateMemorySurface; size *= pDevice->getSharedDeviceInfo().computeUnitsUsedForScratch; - size *= isSmitThread ? mockKernel.mockKernel->getKernelInfo().getMaxSimdSize() : 1; + size *= isSmitThread ? mockKernel.mockKernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize() : 1; mockKernel.mockKernel->initialize(); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[PRIVATE_MEMORY_USAGE_TOO_HIGH], - mockKernel.mockKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), size); + mockKernel.mockKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName.c_str(), size); EXPECT_EQ(!zeroSized, containsHint(expectedHint, userData)); } } diff --git a/opencl/test/unit_test/context/driver_diagnostics_tests.h b/opencl/test/unit_test/context/driver_diagnostics_tests.h index 8b648016e8..9612994af1 100644 --- a/opencl/test/unit_test/context/driver_diagnostics_tests.h +++ b/opencl/test/unit_test/context/driver_diagnostics_tests.h @@ -233,6 +233,7 @@ struct PerformanceHintEnqueueKernelTest : public PerformanceHintEnqueueTest, kernel = Kernel::create(pProgram, pProgram->getKernelInfosForKernel("CopyBuffer"), &retVal); globalWorkGroupSize[0] = globalWorkGroupSize[1] = globalWorkGroupSize[2] = 1; + rootDeviceIndex = context->getDevice(0)->getRootDeviceIndex(); } void TearDown() override { @@ -241,6 +242,7 @@ struct PerformanceHintEnqueueKernelTest : public PerformanceHintEnqueueTest, PerformanceHintEnqueueTest::TearDown(); } Kernel *kernel = nullptr; + uint32_t rootDeviceIndex = std::numeric_limits::max(); size_t globalWorkGroupSize[3]{}; }; diff --git a/opencl/test/unit_test/device_queue/device_queue_hw_tests.cpp b/opencl/test/unit_test/device_queue/device_queue_hw_tests.cpp index c86ee61aca..8f50bcef9a 100644 --- a/opencl/test/unit_test/device_queue/device_queue_hw_tests.cpp +++ b/opencl/test/unit_test/device_queue/device_queue_hw_tests.cpp @@ -326,7 +326,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueSlb, WhenBuildingSlbThenCleanupSectionIsC // 4 pages padding expected after cleanup section EXPECT_LE(4 * MemoryConstants::pageSize, slbMax - slbUsed); - if (mockParentKernel->getKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages) { + if (mockParentKernel->getKernelInfo(testedRootDeviceIndex).patchInfo.executionEnvironment->UsesFencesForReadWriteImages) { cleanupSectionOffsetToParse += GpgpuWalkerHelper::getSizeForWADisableLSQCROPERFforOCL(mockParentKernel) / 2; } @@ -402,7 +402,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueSlb, GivenProfilingWhenBuildingSlbThenEmC auto pipeControlItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - if (mockParentKernel->getKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages && GpgpuWalkerHelper::getSizeForWADisableLSQCROPERFforOCL(mockParentKernel) > 0) { + if (mockParentKernel->getKernelInfo(testedRootDeviceIndex).patchInfo.executionEnvironment->UsesFencesForReadWriteImages && GpgpuWalkerHelper::getSizeForWADisableLSQCROPERFforOCL(mockParentKernel) > 0) { auto loadRegImmItor = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); EXPECT_NE(hwParser.cmdList.end(), loadRegImmItor); diff --git a/opencl/test/unit_test/event/event_tests.cpp b/opencl/test/unit_test/event/event_tests.cpp index 2034ad813e..8a0d81a3ab 100644 --- a/opencl/test/unit_test/event/event_tests.cpp +++ b/opencl/test/unit_test/event/event_tests.cpp @@ -568,7 +568,7 @@ TEST_F(InternalsEventTest, givenBlockedKernelWithPrintfWhenSubmittedThenPrintOut MockKernelWithInternals mockKernelWithInternals(*pClDevice); auto pKernel = mockKernelWithInternals.mockKernel; - KernelInfo *kernelInfo = const_cast(&pKernel->getKernelInfo()); + KernelInfo *kernelInfo = const_cast(&pKernel->getKernelInfo(rootDeviceIndex)); kernelInfo->patchInfo.pAllocateStatelessPrintfSurface = pPrintfSurface; kernelInfo->patchInfo.stringDataMap.insert(std::make_pair(0, testString)); uint64_t crossThread[10]; diff --git a/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp b/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp index 2f738e70de..068f87ed5c 100644 --- a/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp +++ b/opencl/test/unit_test/execution_model/enqueue_execution_model_kernel_tests.cpp @@ -61,19 +61,19 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu MockMultiDispatchInfo multiDispatchInfo(pClDevice, pKernel); - auto graphicsAllocation = pKernel->getKernelInfo().getGraphicsAllocation(); + auto graphicsAllocation = pKernel->getKernelInfo(rootDeviceIndex).getGraphicsAllocation(); auto kernelIsaAddress = graphicsAllocation->getGpuAddressToPatch(); auto &hardwareInfo = pClDevice->getHardwareInfo(); auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); if (EngineHelpers::isCcs(pCmdQ->getGpgpuEngine().osContext->getEngineType()) && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) { - kernelIsaAddress += pKernel->getKernelInfo().patchInfo.threadPayload->OffsetToSkipSetFFIDGP; + kernelIsaAddress += pKernel->getKernelInfo(rootDeviceIndex).patchInfo.threadPayload->OffsetToSkipSetFFIDGP; } pCmdQ->enqueueKernel(pKernel, 1, globalOffsets, workItems, workItems, 0, nullptr, nullptr); - if (pKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName == "kernel_reflection") { + if (pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName == "kernel_reflection") { if (EncodeSurfaceState::doBindingTablePrefetch()) { EXPECT_NE(0u, idData[0].getSamplerCount()); } else { @@ -456,7 +456,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelEnqueueFixture, GivenParentKernelWhenEnq pCmdQ->enqueueKernel(parentKernel, 1, offset, gws, gws, 0, nullptr, nullptr); - const auto &patchInfo = parentKernel->getKernelInfo().patchInfo; + const auto &patchInfo = parentKernel->getKernelInfo(rootDeviceIndex).patchInfo; if (patchInfo.pAllocateStatelessDefaultDeviceQueueSurface) { auto patchLocation = ptrOffset(reinterpret_cast(parentKernel->getCrossThreadData(rootDeviceIndex)), diff --git a/opencl/test/unit_test/execution_model/parent_kernel_dispatch_tests.cpp b/opencl/test/unit_test/execution_model/parent_kernel_dispatch_tests.cpp index 7ae097ab2d..25be22b78b 100644 --- a/opencl/test/unit_test/execution_model/parent_kernel_dispatch_tests.cpp +++ b/opencl/test/unit_test/execution_model/parent_kernel_dispatch_tests.cpp @@ -129,7 +129,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueue auto &ssh = pCmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u); - EXPECT_LE(pKernel->getKernelInfo().heapInfo.SurfaceStateHeapSize, ssh.getMaxAvailableSpace()); + EXPECT_LE(pKernel->getKernelInfo(rootDeviceIndex).heapInfo.SurfaceStateHeapSize, ssh.getMaxAvailableSpace()); size_t minRequiredSize = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); size_t minRequiredSizeForEM = HardwareCommandsHelper::getSshSizeForExecutionModel(*pKernel, rootDeviceIndex); @@ -167,10 +167,10 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueue size_t sshUsed = blockedCommandsData->ssh->getUsed(); size_t expectedSizeSSH = pKernel->getNumberOfBindingTableStates() * sizeof(RENDER_SURFACE_STATE) + - pKernel->getKernelInfo().patchInfo.bindingTableState->Count * sizeof(BINDING_TABLE_STATE) + + pKernel->getKernelInfo(rootDeviceIndex).patchInfo.bindingTableState->Count * sizeof(BINDING_TABLE_STATE) + UnitTestHelper::getDefaultSshUsage(); - if ((pKernel->requiresSshForBuffers()) || (pKernel->getKernelInfo().patchInfo.imageMemObjKernelArgs.size() > 0)) { + if ((pKernel->requiresSshForBuffers()) || (pKernel->getKernelInfo(rootDeviceIndex).patchInfo.imageMemObjKernelArgs.size() > 0)) { EXPECT_EQ(expectedSizeSSH, sshUsed); } @@ -342,7 +342,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, MockParentKernelDispatch, GivenUsedSSHHeapWhenParent // Assuming parent is not using SSH, this is becuase storing allocation on reuse list and allocating // new one by obtaining from reuse list returns the same allocation and heap buffer does not differ // If parent is not using SSH, then heap obtained has zero usage and the same buffer - ASSERT_EQ(0u, mockParentKernel->getKernelInfo().heapInfo.SurfaceStateHeapSize); + ASSERT_EQ(0u, mockParentKernel->getKernelInfo(rootDeviceIndex).heapInfo.SurfaceStateHeapSize); DispatchInfo dispatchInfo(pClDevice, mockParentKernel, 1, workItems, nullptr, globalOffsets); MultiDispatchInfo multiDispatchInfo(mockParentKernel); diff --git a/opencl/test/unit_test/execution_model/scheduler_dispatch_tests.cpp b/opencl/test/unit_test/execution_model/scheduler_dispatch_tests.cpp index 27a2a1f419..58cfdebab6 100644 --- a/opencl/test/unit_test/execution_model/scheduler_dispatch_tests.cpp +++ b/opencl/test/unit_test/execution_model/scheduler_dispatch_tests.cpp @@ -122,7 +122,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, WhenDispatchingSched uint32_t threadsPerWorkGroup = walker->getThreadWidthCounterMaximum(); - EXPECT_EQ(scheduler.getLws() / scheduler.getKernelInfo().getMaxSimdSize(), threadsPerWorkGroup); + EXPECT_EQ(scheduler.getLws() / scheduler.getKernelInfo(rootDeviceIndex).getMaxSimdSize(), threadsPerWorkGroup); numWorkgroupsProgrammed[0] = walker->getThreadGroupIdXDimension(); numWorkgroupsProgrammed[1] = walker->getThreadGroupIdYDimension(); @@ -149,7 +149,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, WhenDispatchingSched auto numChannels = 3; auto grfSize = pDevice->getHardwareInfo().capabilityTable.grfSize; - auto sizePerThreadDataTotal = PerThreadDataHelper::getPerThreadDataSizeTotal(scheduler.getKernelInfo().getMaxSimdSize(), grfSize, numChannels, scheduler.getLws()); + auto sizePerThreadDataTotal = PerThreadDataHelper::getPerThreadDataSizeTotal(scheduler.getKernelInfo(rootDeviceIndex).getMaxSimdSize(), grfSize, numChannels, scheduler.getLws()); auto sizeCrossThreadData = scheduler.getCrossThreadDataSize(rootDeviceIndex); auto IndirectDataLength = alignUp((uint32_t)(sizeCrossThreadData + sizePerThreadDataTotal), GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); diff --git a/opencl/test/unit_test/gtpin/gtpin_tests.cpp b/opencl/test/unit_test/gtpin/gtpin_tests.cpp index fe6a60d66f..5f41c77659 100644 --- a/opencl/test/unit_test/gtpin/gtpin_tests.cpp +++ b/opencl/test/unit_test/gtpin/gtpin_tests.cpp @@ -762,7 +762,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelIsExecutedThenGTPinCa EXPECT_EQ(prevCount11 + 1, KernelCreateCallbackCount); Kernel *pKernel1 = (Kernel *)kernel1; - const KernelInfo &kInfo1 = pKernel1->getKernelInfo(); + const KernelInfo &kInfo1 = pKernel1->getKernelInfo(rootDeviceIndex); uint64_t gtpinKernelId1 = pKernel1->getKernelId(); EXPECT_EQ(kInfo1.shaderHashCode, gtpinKernelId1); @@ -796,7 +796,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelIsExecutedThenGTPinCa EXPECT_EQ(prevCount21, KernelCreateCallbackCount); Kernel *pKernel2 = (Kernel *)kernel2; - const KernelInfo &kInfo2 = pKernel2->getKernelInfo(); + const KernelInfo &kInfo2 = pKernel2->getKernelInfo(rootDeviceIndex); uint64_t gtpinKernelId2 = pKernel2->getKernelId(); EXPECT_EQ(kInfo2.shaderHashCode, gtpinKernelId2); @@ -910,7 +910,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelINTELIsExecutedThenGT EXPECT_EQ(prevCount11 + 1, KernelCreateCallbackCount); Kernel *pKernel1 = (Kernel *)kernel1; - const KernelInfo &kInfo1 = pKernel1->getKernelInfo(); + const KernelInfo &kInfo1 = pKernel1->getKernelInfo(rootDeviceIndex); uint64_t gtpinKernelId1 = pKernel1->getKernelId(); EXPECT_EQ(kInfo1.shaderHashCode, gtpinKernelId1); @@ -950,7 +950,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelINTELIsExecutedThenGT EXPECT_EQ(prevCount21, KernelCreateCallbackCount); Kernel *pKernel2 = (Kernel *)kernel2; - const KernelInfo &kInfo2 = pKernel2->getKernelInfo(); + const KernelInfo &kInfo2 = pKernel2->getKernelInfo(rootDeviceIndex); uint64_t gtpinKernelId2 = pKernel2->getKernelId(); EXPECT_EQ(kInfo2.shaderHashCode, gtpinKernelId2); @@ -1266,7 +1266,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelWithoutSSHIsUsedThenG EXPECT_EQ(prevCount1 + 1, KernelCreateCallbackCount); Kernel *pKernel = (Kernel *)kernel; - const KernelInfo &kInfo = pKernel->getKernelInfo(); + const KernelInfo &kInfo = pKernel->getKernelInfo(rootDeviceIndex); uint64_t gtpinKernelId = pKernel->getKernelId(); EXPECT_EQ(kInfo.shaderHashCode, gtpinKernelId); @@ -1379,7 +1379,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenBlockedKernelWithoutSSHIsUs EXPECT_EQ(prevCount1 + 1, KernelCreateCallbackCount); Kernel *pKernel = (Kernel *)kernel; - const KernelInfo &kInfo = pKernel->getKernelInfo(); + const KernelInfo &kInfo = pKernel->getKernelInfo(rootDeviceIndex); uint64_t gtpinKernelId = pKernel->getKernelId(); EXPECT_EQ(kInfo.shaderHashCode, gtpinKernelId); @@ -1503,7 +1503,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenTheSameKerneIsExecutedTwice EXPECT_EQ(prevCount11 + 1, KernelCreateCallbackCount); Kernel *pKernel1 = (Kernel *)kernel1; - const KernelInfo &kInfo1 = pKernel1->getKernelInfo(); + const KernelInfo &kInfo1 = pKernel1->getKernelInfo(rootDeviceIndex); uint64_t gtpinKernelId1 = pKernel1->getKernelId(); EXPECT_EQ(kInfo1.shaderHashCode, gtpinKernelId1); @@ -1541,7 +1541,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenTheSameKerneIsExecutedTwice EXPECT_EQ(prevCount21, KernelCreateCallbackCount); Kernel *pKernel2 = (Kernel *)kernel2; - const KernelInfo &kInfo2 = pKernel2->getKernelInfo(); + const KernelInfo &kInfo2 = pKernel2->getKernelInfo(rootDeviceIndex); uint64_t gtpinKernelId2 = pKernel2->getKernelId(); EXPECT_EQ(kInfo2.shaderHashCode, gtpinKernelId2); @@ -2339,7 +2339,7 @@ TEST_F(GTPinTests, givenKernelThenVerifyThatKernelCodeSubstitutionWorksWell) { uint8_t *pBin2 = reinterpret_cast(const_cast(pKernel->getKernelHeap())); EXPECT_EQ(pBin2, &newCode[0]); - auto kernelIsa = pKernel->getKernelInfo().kernelAllocation->getUnderlyingBuffer(); + auto kernelIsa = pKernel->getKernelInfo(rootDeviceIndex).kernelAllocation->getUnderlyingBuffer(); EXPECT_EQ(0, memcmp(kernelIsa, newCode, newCodeSize)); diff --git a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp index 42ca6bedc8..8305683a68 100644 --- a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp +++ b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp @@ -92,7 +92,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenProgramInterfaceDescriptor size_t crossThreadDataSize = kernel->getCrossThreadDataSize(rootDeviceIndex); HardwareCommandsHelper::sendInterfaceDescriptorData( - indirectHeap, 0, 0, crossThreadDataSize, 64, 0, 0, 0, 1, *kernel, 0, pDevice->getPreemptionMode(), nullptr, pDevice->getHardwareInfo()); + indirectHeap, 0, 0, crossThreadDataSize, 64, 0, 0, 0, 1, *kernel, 0, pDevice->getPreemptionMode(), nullptr, *pDevice); auto usedIndirectHeapAfter = indirectHeap.getUsed(); EXPECT_EQ(sizeof(INTERFACE_DESCRIPTOR_DATA), usedIndirectHeapAfter - usedIndirectHeapBefore); @@ -339,7 +339,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenAllocatingIndirectStateRes sizeof(INTERFACE_DESCRIPTOR_DATA)); uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); - auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*kernel); + auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*kernel, rootDeviceIndex); HardwareCommandsHelper::sendIndirectState( commandStream, @@ -348,7 +348,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenAllocatingIndirectStateRes ssh, *kernel, kernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex), - kernel->getKernelInfo().getMaxSimdSize(), + kernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize(), localWorkSizes, IDToffset, interfaceDescriptorIndex, @@ -363,7 +363,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenAllocatingIndirectStateRes auto usedAfterDSH = dsh.getUsed(); auto usedAfterIOH = ioh.getUsed(); auto usedAfterSSH = ssh.getUsed(); - auto sizeRequiredDSH = HardwareCommandsHelper::getSizeRequiredDSH(*kernel); + auto sizeRequiredDSH = HardwareCommandsHelper::getSizeRequiredDSH(rootDeviceIndex, *kernel); auto sizeRequiredIOH = HardwareCommandsHelper::getSizeRequiredIOH(rootDeviceIndex, *kernel, localWorkSize); auto sizeRequiredSSH = HardwareCommandsHelper::getSizeRequiredSSH(*kernel, rootDeviceIndex); @@ -394,7 +394,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWithFourBindingTabl const size_t localWorkSizes[3]{localWorkSize, 1, 1}; uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); - auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel); + auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel, rootDeviceIndex); HardwareCommandsHelper::sendIndirectState( commandStream, @@ -403,7 +403,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWithFourBindingTabl ssh, *mockKernelWithInternal->mockKernel, mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex), - mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(), + mockKernelWithInternal->mockKernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize(), localWorkSizes, 0, interfaceDescriptorIndex, @@ -442,7 +442,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelThatIsSchedulerWhen const size_t localWorkSizes[3]{localWorkSize, 1, 1}; uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); - auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel); + auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel, rootDeviceIndex); HardwareCommandsHelper::sendIndirectState( commandStream, @@ -451,7 +451,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelThatIsSchedulerWhen ssh, *mockKernelWithInternal->mockKernel, mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex), - mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(), + mockKernelWithInternal->mockKernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize(), localWorkSizes, 0, interfaceDescriptorIndex, @@ -484,7 +484,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWith100BindingTable const size_t localWorkSizes[3]{localWorkSize, 1, 1}; uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); - auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel); + auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel, rootDeviceIndex); HardwareCommandsHelper::sendIndirectState( commandStream, @@ -493,7 +493,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWith100BindingTable ssh, *mockKernelWithInternal->mockKernel, mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex), - mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(), + mockKernelWithInternal->mockKernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize(), localWorkSizes, 0, interfaceDescriptorIndex, @@ -552,7 +552,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, whenSendingIndirectStateThenKe dsh.getSpace(sizeof(INTERFACE_DESCRIPTOR_DATA)); KernelInfo modifiedKernelInfo = {}; - modifiedKernelInfo.patchInfo = kernel->getKernelInfo().patchInfo; + modifiedKernelInfo.patchInfo = kernel->getKernelInfo(rootDeviceIndex).patchInfo; modifiedKernelInfo.kernelDescriptor.kernelAttributes.workgroupWalkOrder[0] = 2; modifiedKernelInfo.kernelDescriptor.kernelAttributes.workgroupWalkOrder[1] = 1; modifiedKernelInfo.kernelDescriptor.kernelAttributes.workgroupWalkOrder[2] = 0; @@ -564,7 +564,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, whenSendingIndirectStateThenKe MockKernel mockKernel(kernel->getProgram(), kernelInfos, false); uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); - auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(mockKernel); + auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(mockKernel, rootDeviceIndex); HardwareCommandsHelper::sendIndirectState( commandStream, @@ -640,7 +640,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenSendingIndirectStateThenBi auto sshUsed = ssh.getUsed(); // Obtain where the pointers will be stored - const auto &kernelInfo = kernel->getKernelInfo(); + const auto &kernelInfo = kernel->getKernelInfo(rootDeviceIndex); auto numSurfaceStates = kernelInfo.patchInfo.statelessGlobalMemObjKernelArgs.size() + kernelInfo.patchInfo.imageMemObjKernelArgs.size(); EXPECT_EQ(2u, numSurfaceStates); @@ -655,7 +655,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenSendingIndirectStateThenBi const_cast(kernelInfo).requiresSshForBuffers = true; uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); - auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*kernel); + auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*kernel, rootDeviceIndex); HardwareCommandsHelper::sendIndirectState( commandStream, @@ -664,7 +664,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenSendingIndirectStateThenBi ssh, *kernel, kernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex), - kernel->getKernelInfo().getMaxSimdSize(), + kernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize(), localWorkSizes, 0, interfaceDescriptorIndex, @@ -815,7 +815,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenGettingBindingTableStateTh // push surfaces states and binding table to given ssh heap uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); - auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*pKernel); + auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*pKernel, rootDeviceIndex); HardwareCommandsHelper::sendIndirectState( commandStream, @@ -824,7 +824,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenGettingBindingTableStateTh ssh, *pKernel, pKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex), - pKernel->getKernelInfo().getMaxSimdSize(), + pKernel->getKernelInfo(rootDeviceIndex).getMaxSimdSize(), localWorkSizes, 0, interfaceDescriptorIndex, @@ -1012,7 +1012,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithSamplersWhenInd mockKernelWithInternal->mockKernel->setSshLocal(mockKernelWithInternal->sshLocal, sizeof(mockKernelWithInternal->sshLocal), rootDeviceIndex); uint32_t interfaceDescriptorIndex = 0; auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType()); - auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel); + auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel, rootDeviceIndex); HardwareCommandsHelper::sendIndirectState( commandStream, @@ -1132,19 +1132,19 @@ HWTEST_F(HardwareCommandsTest, givenEnabledPassInlineDataWhenKernelAllowsInlineT const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->PassInlineData = 1; mockKernelWithInternal->mockKernel->setCrossThreadData(crossThreadData, sizeof(crossThreadData)); - EXPECT_TRUE(HardwareCommandsHelper::inlineDataProgrammingRequired(*mockKernelWithInternal->mockKernel)); + EXPECT_TRUE(HardwareCommandsHelper::inlineDataProgrammingRequired(*mockKernelWithInternal->mockKernel, rootDeviceIndex)); } HWTEST_F(HardwareCommandsTest, givenNoDebugSettingsWhenDefaultModeIsExcercisedThenWeFollowKernelSettingForInlineProgramming) { const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->PassInlineData = 1; - EXPECT_TRUE(HardwareCommandsHelper::inlineDataProgrammingRequired(*mockKernelWithInternal->mockKernel)); + EXPECT_TRUE(HardwareCommandsHelper::inlineDataProgrammingRequired(*mockKernelWithInternal->mockKernel, rootDeviceIndex)); } HWTEST_F(HardwareCommandsTest, givenDisabledPassInlineDataWhenKernelAllowsInlineThenReturnFalse) { DebugManagerStateRestore restore; DebugManager.flags.EnablePassInlineData.set(0u); const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->PassInlineData = 1; - EXPECT_FALSE(HardwareCommandsHelper::inlineDataProgrammingRequired(*mockKernelWithInternal->mockKernel)); + EXPECT_FALSE(HardwareCommandsHelper::inlineDataProgrammingRequired(*mockKernelWithInternal->mockKernel, rootDeviceIndex)); } HWTEST_F(HardwareCommandsTest, givenEnabledPassInlineDataWhenKernelDisallowsInlineThenReturnFalse) { @@ -1156,7 +1156,7 @@ HWTEST_F(HardwareCommandsTest, givenEnabledPassInlineDataWhenKernelDisallowsInli const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->PassInlineData = 0; mockKernelWithInternal->mockKernel->setCrossThreadData(crossThreadData, sizeof(crossThreadData)); - EXPECT_FALSE(HardwareCommandsHelper::inlineDataProgrammingRequired(*mockKernelWithInternal->mockKernel)); + EXPECT_FALSE(HardwareCommandsHelper::inlineDataProgrammingRequired(*mockKernelWithInternal->mockKernel, rootDeviceIndex)); } HWTEST_F(HardwareCommandsTest, whenLocalIdxInXDimPresentThenExpectLocalIdsInUseIsTrue) { @@ -1164,7 +1164,7 @@ HWTEST_F(HardwareCommandsTest, whenLocalIdxInXDimPresentThenExpectLocalIdsInUseI const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDYPresent = 0; const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDZPresent = 0; - EXPECT_TRUE(HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel)); + EXPECT_TRUE(HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel, rootDeviceIndex)); } HWTEST_F(HardwareCommandsTest, whenLocalIdxInYDimPresentThenExpectLocalIdsInUseIsTrue) { @@ -1172,7 +1172,7 @@ HWTEST_F(HardwareCommandsTest, whenLocalIdxInYDimPresentThenExpectLocalIdsInUseI const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDYPresent = 1; const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDZPresent = 0; - EXPECT_TRUE(HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel)); + EXPECT_TRUE(HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel, rootDeviceIndex)); } HWTEST_F(HardwareCommandsTest, whenLocalIdxInZDimPresentThenExpectLocalIdsInUseIsTrue) { @@ -1180,7 +1180,7 @@ HWTEST_F(HardwareCommandsTest, whenLocalIdxInZDimPresentThenExpectLocalIdsInUseI const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDYPresent = 0; const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDZPresent = 1; - EXPECT_TRUE(HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel)); + EXPECT_TRUE(HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel, rootDeviceIndex)); } HWTEST_F(HardwareCommandsTest, whenLocalIdxAreNotPresentThenExpectLocalIdsInUseIsFalse) { @@ -1188,7 +1188,7 @@ HWTEST_F(HardwareCommandsTest, whenLocalIdxAreNotPresentThenExpectLocalIdsInUseI const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDYPresent = 0; const_cast(mockKernelWithInternal->kernelInfo.patchInfo.threadPayload)->LocalIDZPresent = 0; - EXPECT_FALSE(HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel)); + EXPECT_FALSE(HardwareCommandsHelper::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel, rootDeviceIndex)); } HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenCacheFlushAfterWalkerEnabledWhenProgramGlobalSurfacePresentThenExpectCacheFlushCommand) { diff --git a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.h b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.h index 731abf1056..3506f46d92 100644 --- a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.h +++ b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.h @@ -45,7 +45,7 @@ struct HardwareCommandsTest : ClDeviceFixture, template size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, const Kernel &srcKernel) { - return EncodeSurfaceState::pushBindingTableAndSurfaceStates(dstHeap, (srcKernel.getKernelInfo().patchInfo.bindingTableState != nullptr) ? srcKernel.getKernelInfo().patchInfo.bindingTableState->Count : 0, + return EncodeSurfaceState::pushBindingTableAndSurfaceStates(dstHeap, (srcKernel.getKernelInfo(rootDeviceIndex).patchInfo.bindingTableState != nullptr) ? srcKernel.getKernelInfo(rootDeviceIndex).patchInfo.bindingTableState->Count : 0, srcKernel.getSurfaceStateHeap(rootDeviceIndex), srcKernel.getSurfaceStateHeapSize(rootDeviceIndex), srcKernel.getNumberOfBindingTableStates(), srcKernel.getBindingTableOffset()); } diff --git a/opencl/test/unit_test/helpers/task_information_tests.cpp b/opencl/test/unit_test/helpers/task_information_tests.cpp index e4b7835562..22bcb3dec2 100644 --- a/opencl/test/unit_test/helpers/task_information_tests.cpp +++ b/opencl/test/unit_test/helpers/task_information_tests.cpp @@ -233,13 +233,14 @@ HWTEST_F(DispatchFlagsTests, givenCommandComputeKernelWhenSubmitThenPassCorrectD } std::unique_ptr command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, ndRangeKernel, nullptr, preemptionMode, kernel, 1)); command->submit(20, false); + auto rootDeviceIndex = mockCsr->getRootDeviceIndex(); EXPECT_FALSE(mockCsr->passedDispatchFlags.pipelineSelectArgs.specialPipelineSelectMode); EXPECT_EQ(kernel.mockKernel->isVmeKernel(), mockCsr->passedDispatchFlags.pipelineSelectArgs.mediaSamplerRequired); EXPECT_EQ(mockCmdQ->flushStamp->getStampReference(), mockCsr->passedDispatchFlags.flushStampReference); EXPECT_EQ(mockCmdQ->getThrottle(), mockCsr->passedDispatchFlags.throttle); EXPECT_EQ(preemptionMode, mockCsr->passedDispatchFlags.preemptionMode); - EXPECT_EQ(kernel.mockKernel->getKernelInfo().patchInfo.executionEnvironment->NumGRFRequired, mockCsr->passedDispatchFlags.numGrfRequired); + EXPECT_EQ(kernel.mockKernel->getKernelInfo(rootDeviceIndex).patchInfo.executionEnvironment->NumGRFRequired, mockCsr->passedDispatchFlags.numGrfRequired); EXPECT_EQ(L3CachingSettings::l3CacheOn, mockCsr->passedDispatchFlags.l3CacheSettings); EXPECT_TRUE(mockCsr->passedDispatchFlags.blocking); EXPECT_EQ(flushDC, mockCsr->passedDispatchFlags.dcFlush); diff --git a/opencl/test/unit_test/kernel/clone_kernel_tests.cpp b/opencl/test/unit_test/kernel/clone_kernel_tests.cpp index 9ecbdba95e..45dc098143 100644 --- a/opencl/test/unit_test/kernel/clone_kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/clone_kernel_tests.cpp @@ -192,7 +192,7 @@ TEST_F(CloneKernelTest, GivenArgBufferWhenCloningKernelThenKernelInfoIsCorrect) EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).isPatched, pClonedKernel->getKernelArgInfo(0).isPatched); auto pKernelArg = (cl_mem *)(pClonedKernel->getCrossThreadData(rootDeviceIndex) + - pClonedKernel->getKernelInfo().kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); + pClonedKernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(buffer.getCpuAddress(), *pKernelArg); } @@ -224,7 +224,7 @@ TEST_F(CloneKernelTest, GivenArgPipeWhenCloningKernelThenKernelInfoIsCorrect) { EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).isPatched, pClonedKernel->getKernelArgInfo(0).isPatched); auto pKernelArg = (cl_mem *)(pClonedKernel->getCrossThreadData(rootDeviceIndex) + - pClonedKernel->getKernelInfo().kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); + pClonedKernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(pipe.getCpuAddress(), *pKernelArg); } @@ -265,7 +265,7 @@ TEST_F(CloneKernelTest, GivenArgImageWhenCloningKernelThenKernelInfoIsCorrect) { auto crossThreadData = reinterpret_cast(pClonedKernel->getCrossThreadData(rootDeviceIndex)); EXPECT_EQ(objectId, *crossThreadData); - const auto &argInfo = pClonedKernel->getKernelInfo().kernelArgInfo[0]; + const auto &argInfo = pClonedKernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0]; auto pImgWidth = ptrOffset(crossThreadData, argInfo.offsetImgWidth); EXPECT_EQ(imageWidth, *pImgWidth); @@ -316,7 +316,7 @@ TEST_F(CloneKernelTest, GivenArgAcceleratorWhenCloningKernelThenKernelInfoIsCorr auto crossThreadData = reinterpret_cast(pClonedKernel->getCrossThreadData(rootDeviceIndex)); - const auto &argInfo = pClonedKernel->getKernelInfo().kernelArgInfo[0]; + const auto &argInfo = pClonedKernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0]; uint32_t *pMbBlockType = ptrOffset(crossThreadData, argInfo.offsetVmeMbBlockType); EXPECT_EQ(desc.mb_block_type, *pMbBlockType); @@ -370,7 +370,7 @@ TEST_F(CloneKernelTest, GivenArgSamplerWhenCloningKernelThenKernelInfoIsCorrect) auto crossThreadData = reinterpret_cast(pClonedKernel->getCrossThreadData(rootDeviceIndex)); EXPECT_EQ(objectId, *crossThreadData); - const auto &argInfo = pClonedKernel->getKernelInfo().kernelArgInfo[0]; + const auto &argInfo = pClonedKernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0]; auto pSnapWa = ptrOffset(crossThreadData, argInfo.offsetSamplerSnapWa); EXPECT_EQ(sampler->getSnapWaValue(), *pSnapWa); @@ -417,7 +417,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CloneKernelTest, GivenArgDeviceQueueWhenCloningKerne EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).isPatched, pClonedKernel->getKernelArgInfo(0).isPatched); auto pKernelArg = (uintptr_t *)(pClonedKernel->getCrossThreadData(rootDeviceIndex) + - pClonedKernel->getKernelInfo().kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); + pClonedKernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(static_cast(mockDevQueue.getQueueBuffer()->getGpuAddressToPatch()), *pKernelArg); } @@ -445,7 +445,7 @@ TEST_F(CloneKernelTest, GivenArgSvmWhenCloningKernelThenKernelInfoIsCorrect) { EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).isPatched, pClonedKernel->getKernelArgInfo(0).isPatched); auto pKernelArg = (void **)(pClonedKernel->getCrossThreadData(rootDeviceIndex) + - pClonedKernel->getKernelInfo().kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); + pClonedKernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(svmPtr, *pKernelArg); delete[] svmPtr; @@ -476,7 +476,7 @@ TEST_F(CloneKernelTest, GivenArgSvmAllocWhenCloningKernelThenKernelInfoIsCorrect EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).isPatched, pClonedKernel->getKernelArgInfo(0).isPatched); auto pKernelArg = (void **)(pClonedKernel->getCrossThreadData(rootDeviceIndex) + - pClonedKernel->getKernelInfo().kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); + pClonedKernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(svmPtr, *pKernelArg); delete[] svmPtr; @@ -507,7 +507,7 @@ TEST_F(CloneKernelTest, GivenArgImmediateWhenCloningKernelThenKernelInfoIsCorrec EXPECT_EQ(pSourceKernel->getKernelArgInfo(0).isPatched, pClonedKernel->getKernelArgInfo(0).isPatched); auto pKernelArg = (TypeParam *)(pClonedKernel->getCrossThreadData(rootDeviceIndex) + - pClonedKernel->getKernelInfo().kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); + pClonedKernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(value, *pKernelArg); } diff --git a/opencl/test/unit_test/kernel/kernel_image_arg_tests.cpp b/opencl/test/unit_test/kernel/kernel_image_arg_tests.cpp index 8a0cd48652..e3ac703b71 100644 --- a/opencl/test/unit_test/kernel/kernel_image_arg_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_image_arg_tests.cpp @@ -66,16 +66,16 @@ TEST_F(KernelImageArgTest, givenKernelWithFlatImageTokensWhenArgIsSetThenPatchAl auto crossThreadData = reinterpret_cast(pKernel->getCrossThreadData(rootDeviceIndex)); auto pixelSize = image->getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes; - auto offsetFlatBaseOffset = ptrOffset(crossThreadData, pKernel->getKernelInfo().kernelArgInfo[0].offsetFlatBaseOffset); + auto offsetFlatBaseOffset = ptrOffset(crossThreadData, pKernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].offsetFlatBaseOffset); EXPECT_EQ(imageBaseAddress, *reinterpret_cast(offsetFlatBaseOffset)); - auto offsetFlatWidth = ptrOffset(crossThreadData, pKernel->getKernelInfo().kernelArgInfo[0].offsetFlatWidth); + auto offsetFlatWidth = ptrOffset(crossThreadData, pKernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].offsetFlatWidth); EXPECT_EQ(static_cast((imageWidth * pixelSize) - 1), *offsetFlatWidth); - auto offsetFlatHeight = ptrOffset(crossThreadData, pKernel->getKernelInfo().kernelArgInfo[0].offsetFlatHeight); + auto offsetFlatHeight = ptrOffset(crossThreadData, pKernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].offsetFlatHeight); EXPECT_EQ(static_cast((imageHeight * pixelSize) - 1), *offsetFlatHeight); - auto offsetFlatPitch = ptrOffset(crossThreadData, pKernel->getKernelInfo().kernelArgInfo[0].offsetFlatPitch); + auto offsetFlatPitch = ptrOffset(crossThreadData, pKernel->getKernelInfo(rootDeviceIndex).kernelArgInfo[0].offsetFlatPitch); EXPECT_EQ(imageRowPitch - 1, *offsetFlatPitch); } diff --git a/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp b/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp index 2b8ea9442a..fb68a71367 100644 --- a/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_reflection_surface_tests.cpp @@ -771,7 +771,7 @@ TEST_P(KernelReflectionSurfaceTest, WhenCreatingKernelReflectionSurfaceThenKerne size_t parentImageCount = 0; size_t parentSamplerCount = 0; - if (pKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName == "kernel_reflection") { + if (pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName == "kernel_reflection") { parentImageCount = 1; parentSamplerCount = 1; } @@ -824,7 +824,7 @@ TEST_P(KernelReflectionSurfaceTest, WhenCreatingKernelReflectionSurfaceThenKerne uint32_t parentImages = 0; uint32_t parentSamplers = 0; - if (pKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName == "kernel_reflection") { + if (pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName == "kernel_reflection") { parentImages = 1; parentSamplers = 1; EXPECT_LT(sizeof(IGIL_KernelDataHeader), pKernelHeader->m_ParentSamplerParamsOffset); @@ -1114,7 +1114,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, KernelReflectionSurfaceWithQueueTest, WhenObtainingK cl_sampler samplerCl = sampler.get(); cl_mem imageCl = image3d.get(); - if (pKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName == "kernel_reflection") { + if (pKernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.kernelMetadata.kernelName == "kernel_reflection") { pKernel->setArgSampler(0, sizeof(cl_sampler), &samplerCl); pKernel->setArgImage(1, sizeof(cl_mem), &imageCl); } @@ -1139,7 +1139,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, KernelReflectionSurfaceWithQueueTest, WhenObtainingK if (pKernelHeader->m_ParentKernelImageCount > 0) { uint32_t imageIndex = 0; - for (const auto &arg : pKernel->getKernelInfo().kernelArgInfo) { + for (const auto &arg : pKernel->getKernelInfo(rootDeviceIndex).kernelArgInfo) { if (arg.isImage) { EXPECT_EQ(arg.offsetHeap, pParentImageParams[imageIndex].m_ObjectID); imageIndex++; @@ -1149,7 +1149,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, KernelReflectionSurfaceWithQueueTest, WhenObtainingK if (pKernelHeader->m_ParentSamplerCount > 0) { uint32_t samplerIndex = 0; - for (const auto &arg : pKernel->getKernelInfo().kernelArgInfo) { + for (const auto &arg : pKernel->getKernelInfo(rootDeviceIndex).kernelArgInfo) { if (arg.isSampler) { EXPECT_EQ(OCLRT_ARG_OFFSET_TO_SAMPLER_OBJECT_ID(arg.offsetHeap), pParentSamplerParams[samplerIndex].m_ObjectID); samplerIndex++; diff --git a/opencl/test/unit_test/kernel/kernel_slm_tests.cpp b/opencl/test/unit_test/kernel/kernel_slm_tests.cpp index 222b3ee928..da48863d11 100644 --- a/opencl/test/unit_test/kernel/kernel_slm_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_slm_tests.cpp @@ -94,7 +94,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, KernelSLMAndBarrierTest, GivenStaticSlmSizeWhenProgr 4u, pDevice->getPreemptionMode(), nullptr, - pDevice->getHardwareInfo()); + *pDevice); // add the heap base + offset uint32_t *pIdData = (uint32_t *)indirectHeap.getCpuBase() + offsetInterfaceDescriptorData; diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index 532ec4ef14..dbf1770e8e 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -97,8 +97,8 @@ TEST(KernelTest, WhenKernelIsCreatedThenCorrectMembersAreMemObjects) { } TEST_F(KernelTests, WhenKernelIsCreatedThenKernelHeapIsCorrect) { - EXPECT_EQ(pKernel->getKernelInfo().heapInfo.pKernelHeap, pKernel->getKernelHeap()); - EXPECT_EQ(pKernel->getKernelInfo().heapInfo.KernelHeapSize, pKernel->getKernelHeapSize()); + EXPECT_EQ(pKernel->getKernelInfo(rootDeviceIndex).heapInfo.pKernelHeap, pKernel->getKernelHeap()); + EXPECT_EQ(pKernel->getKernelInfo(rootDeviceIndex).heapInfo.KernelHeapSize, pKernel->getKernelHeapSize()); } TEST_F(KernelTests, GivenInvalidParamNameWhenGettingInfoThenInvalidValueErrorIsReturned) { @@ -210,7 +210,7 @@ TEST_F(KernelTests, givenBinaryWhenItIsQueriedForGpuAddressThenAbsoluteAddressIs ¶mValueSizeRet); EXPECT_EQ(CL_SUCCESS, retVal); - auto expectedGpuAddress = GmmHelper::decanonize(pKernel->getKernelInfo().kernelAllocation->getGpuAddress()); + auto expectedGpuAddress = GmmHelper::decanonize(pKernel->getKernelInfo(rootDeviceIndex).kernelAllocation->getGpuAddress()); EXPECT_EQ(expectedGpuAddress, paramValue); EXPECT_EQ(paramValueSize, paramValueSizeRet); } @@ -1642,7 +1642,7 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenKernelIsaIs EXPECT_EQ(0u, commandStreamReceiver.makeResidentAllocations.size()); pKernel->makeResident(pDevice->getGpgpuCommandStreamReceiver()); EXPECT_EQ(1u, commandStreamReceiver.makeResidentAllocations.size()); - EXPECT_TRUE(commandStreamReceiver.isMadeResident(pKernel->getKernelInfo().getGraphicsAllocation())); + EXPECT_TRUE(commandStreamReceiver.isMadeResident(pKernel->getKernelInfo(rootDeviceIndex).getGraphicsAllocation())); memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation); } @@ -3120,14 +3120,14 @@ TEST(KernelTest, givenKernelWhenForcePerDssBackedBufferProgrammingIsSetThenKerne auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockKernelWithInternals kernel(*device); - EXPECT_TRUE(kernel.mockKernel->requiresPerDssBackedBuffer()); + EXPECT_TRUE(kernel.mockKernel->requiresPerDssBackedBuffer(device->getRootDeviceIndex())); } TEST(KernelTest, givenKernelWhenForcePerDssBackedBufferProgrammingIsNotSetThenKernelDoesntRequirePerDssBackedBuffer) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockKernelWithInternals kernel(*device); - EXPECT_FALSE(kernel.mockKernel->requiresPerDssBackedBuffer()); + EXPECT_FALSE(kernel.mockKernel->requiresPerDssBackedBuffer(device->getRootDeviceIndex())); } TEST(KernelTest, whenKernelIsInitializedThenThreadArbitrationPolicyIsSetToDefaultValue) { diff --git a/opencl/test/unit_test/kernel/parent_kernel_tests.cpp b/opencl/test/unit_test/kernel/parent_kernel_tests.cpp index e1efa862a2..41f08dfec4 100644 --- a/opencl/test/unit_test/kernel/parent_kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/parent_kernel_tests.cpp @@ -71,7 +71,7 @@ TEST(ParentKernelTest, WhenPatchingBlocksSimdSizeThenPatchIsAppliedCorrectly) { parentKernel->patchBlocksSimdSize(rootDeviceIndex); - void *blockSimdSize = ptrOffset(parentKernel->getCrossThreadData(rootDeviceIndex), parentKernel->getKernelInfo().childrenKernelsIdOffset[0].second); + void *blockSimdSize = ptrOffset(parentKernel->getCrossThreadData(rootDeviceIndex), parentKernel->getKernelInfo(rootDeviceIndex).childrenKernelsIdOffset[0].second); uint32_t *simdSize = reinterpret_cast(blockSimdSize); EXPECT_EQ(program->blockKernelManager->getBlockKernelInfo(0)->getMaxSimdSize(), *simdSize); @@ -82,7 +82,8 @@ TEST(ParentKernelTest, GivenParentKernelWhenCheckingForDeviceEnqueueThenTrueIsRe MockContext context(&device); std::unique_ptr parentKernel(MockParentKernel::create(context)); - EXPECT_TRUE(parentKernel->getKernelInfo().hasDeviceEnqueue()); + auto rootDeviceIndex = device.getRootDeviceIndex(); + EXPECT_TRUE(parentKernel->getKernelInfo(rootDeviceIndex).hasDeviceEnqueue()); } TEST(ParentKernelTest, GivenNormalKernelWhenCheckingForDeviceEnqueueThenFalseIsReturned) { @@ -101,7 +102,7 @@ TEST(ParentKernelTest, WhenInitializingParentKernelThenBlocksSimdSizeIsPatched) parentKernel->initialize(); - void *blockSimdSize = ptrOffset(parentKernel->getCrossThreadData(rootDeviceIndex), parentKernel->getKernelInfo().childrenKernelsIdOffset[0].second); + void *blockSimdSize = ptrOffset(parentKernel->getCrossThreadData(rootDeviceIndex), parentKernel->getKernelInfo(rootDeviceIndex).childrenKernelsIdOffset[0].second); uint32_t *simdSize = reinterpret_cast(blockSimdSize); EXPECT_EQ(program->blockKernelManager->getBlockKernelInfo(0)->getMaxSimdSize(), *simdSize); diff --git a/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp b/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp index 8c7c941d9d..f95c807977 100644 --- a/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp +++ b/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp @@ -540,7 +540,7 @@ TEST_F(MemoryAllocatorTest, givenStatelessKernelWithPrintfWhenPrintfSurfaceIsCre auto allocationAddress = printfAllocation->getGpuAddressToPatch(); auto printfPatchAddress = ptrOffset(reinterpret_cast(kernel.mockKernel->getCrossThreadData(rootDeviceIndex)), - kernel.mockKernel->getKernelInfo().patchInfo.pAllocateStatelessPrintfSurface->DataParamOffset); + kernel.mockKernel->getKernelInfo(rootDeviceIndex).patchInfo.pAllocateStatelessPrintfSurface->DataParamOffset); EXPECT_EQ(allocationAddress, *(uintptr_t *)printfPatchAddress); @@ -550,7 +550,8 @@ TEST_F(MemoryAllocatorTest, givenStatelessKernelWithPrintfWhenPrintfSurfaceIsCre } HWTEST_F(MemoryAllocatorTest, givenStatefulKernelWithPrintfWhenPrintfSurfaceIsCreatedThenPrintfSurfaceIsPatchedWithCpuAddress) { - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + auto rootDeviceIndex = 1u; + auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get(), rootDeviceIndex)); MockKernelWithInternals kernel(*device); MockMultiDispatchInfo multiDispatchInfo(device.get(), kernel.mockKernel); SPatchAllocateStatelessPrintfSurface printfSurface; @@ -580,7 +581,7 @@ HWTEST_F(MemoryAllocatorTest, givenStatefulKernelWithPrintfWhenPrintfSurfaceIsCr typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; auto surfaceState = reinterpret_cast( ptrOffset(kernel.mockKernel->getSurfaceStateHeap(device->getRootDeviceIndex()), - kernel.mockKernel->getKernelInfo().patchInfo.pAllocateStatelessPrintfSurface->SurfaceStateHeapOffset)); + kernel.mockKernel->getKernelInfo(rootDeviceIndex).patchInfo.pAllocateStatelessPrintfSurface->SurfaceStateHeapOffset)); auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); EXPECT_EQ(allocationAddress, surfaceAddress); diff --git a/opencl/test/unit_test/program/program_tests.cpp b/opencl/test/unit_test/program/program_tests.cpp index 5f7fdb96f0..5512c128fd 100644 --- a/opencl/test/unit_test/program/program_tests.cpp +++ b/opencl/test/unit_test/program/program_tests.cpp @@ -1367,7 +1367,7 @@ HWTEST_F(PatchTokenTests, givenKernelRequiringConstantAllocationWhenMakeResident auto &residencyVector = pCommandStreamReceiver->getResidencyAllocations(); //we expect kernel ISA here and constant allocation - auto kernelIsa = pKernel->getKernelInfo().getGraphicsAllocation(); + auto kernelIsa = pKernel->getKernelInfo(rootDeviceIndex).getGraphicsAllocation(); auto constantAllocation = pProgram->getConstantSurface(pDevice->getRootDeviceIndex()); auto element = std::find(residencyVector.begin(), residencyVector.end(), kernelIsa); diff --git a/opencl/test/unit_test/scenarios/windows/enqueue_read_write_buffer_scenarios_windows_tests.cpp b/opencl/test/unit_test/scenarios/windows/enqueue_read_write_buffer_scenarios_windows_tests.cpp index 9f2c618b01..96a0c1bfca 100644 --- a/opencl/test/unit_test/scenarios/windows/enqueue_read_write_buffer_scenarios_windows_tests.cpp +++ b/opencl/test/unit_test/scenarios/windows/enqueue_read_write_buffer_scenarios_windows_tests.cpp @@ -104,27 +104,28 @@ HWTEST_F(EnqueueBufferWindowsTest, givenMisalignedHostPtrWhenEnqueueReadBufferCa cmdQ->finish(); parseCommands(*cmdQ); + auto &kernelInfo = kernel->getKernelInfo(rootDeviceIndex); if (hwInfo->capabilityTable.gpuAddressSpace == MemoryConstants::max48BitAddress) { const auto &surfaceStateDst = getSurfaceState(&cmdQ->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0), 1); - if (kernel->getKernelInfo().kernelArgInfo[1].kernelArgPatchInfoVector[0].size == sizeof(uint64_t)) { + if (kernelInfo.kernelArgInfo[1].kernelArgPatchInfoVector[0].size == sizeof(uint64_t)) { auto pKernelArg = (uint64_t *)(kernel->getCrossThreadData(rootDeviceIndex) + - kernel->getKernelInfo().kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset); + kernelInfo.kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(alignDown(gpuVa, 4), static_cast(*pKernelArg)); EXPECT_EQ(*pKernelArg, surfaceStateDst.getSurfaceBaseAddress()); - } else if (kernel->getKernelInfo().kernelArgInfo[1].kernelArgPatchInfoVector[0].size == sizeof(uint32_t)) { + } else if (kernelInfo.kernelArgInfo[1].kernelArgPatchInfoVector[0].size == sizeof(uint32_t)) { auto pKernelArg = (uint32_t *)(kernel->getCrossThreadData(rootDeviceIndex) + - kernel->getKernelInfo().kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset); + kernelInfo.kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(alignDown(gpuVa, 4), static_cast(*pKernelArg)); EXPECT_EQ(static_cast(*pKernelArg), surfaceStateDst.getSurfaceBaseAddress()); } } - if (kernel->getKernelInfo().kernelArgInfo[3].kernelArgPatchInfoVector[0].size == sizeof(uint32_t)) { + if (kernelInfo.kernelArgInfo[3].kernelArgPatchInfoVector[0].size == sizeof(uint32_t)) { auto dstOffset = (uint32_t *)(kernel->getCrossThreadData(rootDeviceIndex) + - kernel->getKernelInfo().kernelArgInfo[3].kernelArgPatchInfoVector[0].crossthreadOffset); + kernelInfo.kernelArgInfo[3].kernelArgPatchInfoVector[0].crossthreadOffset); EXPECT_EQ(ptrDiff(misalignedPtr, alignDown(misalignedPtr, 4)), *dstOffset); } else { // dstOffset arg should be 4 bytes in size, if that changes, above if path should be modified diff --git a/shared/source/command_stream/preemption.cpp b/shared/source/command_stream/preemption.cpp index f0db6a1859..786ba66eac 100644 --- a/shared/source/command_stream/preemption.cpp +++ b/shared/source/command_stream/preemption.cpp @@ -60,13 +60,14 @@ PreemptionMode PreemptionHelper::taskPreemptionMode(PreemptionMode devicePreempt void PreemptionHelper::setPreemptionLevelFlags(PreemptionFlags &flags, Device &device, Kernel *kernel) { if (kernel) { + const auto &kernelInfo = kernel->getKernelInfo(device.getRootDeviceIndex()); flags.flags.disabledMidThreadPreemptionKernel = - kernel->getKernelInfo().patchInfo.executionEnvironment && - kernel->getKernelInfo().patchInfo.executionEnvironment->DisableMidThreadPreemption; + kernelInfo.patchInfo.executionEnvironment && + kernelInfo.patchInfo.executionEnvironment->DisableMidThreadPreemption; flags.flags.vmeKernel = kernel->isVmeKernel(); flags.flags.usesFencesForReadWriteImages = - kernel->getKernelInfo().patchInfo.executionEnvironment && - kernel->getKernelInfo().patchInfo.executionEnvironment->UsesFencesForReadWriteImages; + kernelInfo.patchInfo.executionEnvironment && + kernelInfo.patchInfo.executionEnvironment->UsesFencesForReadWriteImages; flags.flags.schedulerKernel = kernel->isSchedulerKernel; } flags.flags.deviceSupportsVmePreemption = device.getDeviceInfo().vmeAvcSupportsPreemption; diff --git a/shared/test/unit_test/cmd_parse/hw_parse.inl b/shared/test/unit_test/cmd_parse/hw_parse.inl index 45cbe31e4e..30b62f74ed 100644 --- a/shared/test/unit_test/cmd_parse/hw_parse.inl +++ b/shared/test/unit_test/cmd_parse/hw_parse.inl @@ -115,7 +115,7 @@ const void *HardwareParse::getStatelessArgumentPointer(const Kernel &kernel, uin offsetCrossThreadData); // Determine where the argument is - auto &patchInfo = kernel.getKernelInfo().patchInfo; + auto &patchInfo = kernel.getDefaultKernelInfo().patchInfo; for (auto &arg : patchInfo.statelessGlobalMemObjKernelArgs) { if (arg->ArgumentNumber == indexArg) { return ptrOffset(pCrossThreadData, arg->DataParamOffset);