Store single KernelInfo in Kernel

remove root device index from Kernel's methods Related-To: NEO-5001 Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
2025-12-25 05:24:02 +08:00 · 2021-03-22 15:26:03 +00:00
parent ecceddcab6
commit 7098e9c5f2
136 changed files with 1043 additions and 1192 deletions
--- a/opencl/source/helpers/task_information.cpp
+++ b/opencl/source/helpers/task_information.cpp
@@ -168,7 +168,6 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
        printfHandler.get()->makeResident(commandStreamReceiver);
    }
    makeTimestampPacketsResident(commandStreamReceiver);
-    auto rootDeviceIndex = commandQueue.getDevice().getRootDeviceIndex();

    if (executionModelKernel) {
        uint32_t taskCount = commandStreamReceiver.peekTaskCount() + 1;
@@ -213,38 +212,38 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
                                                           commandQueue.getGpgpuCommandStreamReceiver(), bcsCsr);
    }

-    const auto &kernelDescriptor = kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor;
+    const auto &kernelDescriptor = kernel->getKernelInfo().kernelDescriptor;

    auto memoryCompressionState = commandStreamReceiver.getMemoryCompressionState(kernel->isAuxTranslationRequired());

    DispatchFlags dispatchFlags(
-        {},                                                                                      //csrDependencies
-        nullptr,                                                                                 //barrierTimestampPacketNodes
-        {false, kernel->isVmeKernel()},                                                          //pipelineSelectArgs
-        commandQueue.flushStamp->getStampReference(),                                            //flushStampReference
-        commandQueue.getThrottle(),                                                              //throttle
-        preemptionMode,                                                                          //preemptionMode
-        kernelDescriptor.kernelAttributes.numGrfRequired,                                        //numGrfRequired
-        L3CachingSettings::l3CacheOn,                                                            //l3CacheSettings
-        kernel->getThreadArbitrationPolicy(),                                                    //threadArbitrationPolicy
-        kernel->getAdditionalKernelExecInfo(),                                                   //additionalKernelExecInfo
-        kernel->getExecutionType(),                                                              //kernelExecutionType
-        memoryCompressionState,                                                                  //memoryCompressionState
-        commandQueue.getSliceCount(),                                                            //sliceCount
-        true,                                                                                    //blocking
-        flushDC,                                                                                 //dcFlush
-        slmUsed,                                                                                 //useSLM
-        true,                                                                                    //guardCommandBufferWithPipeControl
-        NDRangeKernel,                                                                           //GSBA32BitRequired
-        requiresCoherency,                                                                       //requiresCoherency
-        commandQueue.getPriority() == QueuePriority::LOW,                                        //lowPriority
-        false,                                                                                   //implicitFlush
-        commandQueue.getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(),             //outOfOrderExecutionAllowed
-        false,                                                                                   //epilogueRequired
-        kernel->requiresPerDssBackedBuffer(rootDeviceIndex),                                     //usePerDssBackedBuffer
-        kernel->isSingleSubdevicePreferred(),                                                    //useSingleSubdevice
-        kernel->getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, //useGlobalAtomics
-        kernel->getTotalNumDevicesInContext());                                                  //numDevicesInContext
+        {},                                                                               //csrDependencies
+        nullptr,                                                                          //barrierTimestampPacketNodes
+        {false, kernel->isVmeKernel()},                                                   //pipelineSelectArgs
+        commandQueue.flushStamp->getStampReference(),                                     //flushStampReference
+        commandQueue.getThrottle(),                                                       //throttle
+        preemptionMode,                                                                   //preemptionMode
+        kernelDescriptor.kernelAttributes.numGrfRequired,                                 //numGrfRequired
+        L3CachingSettings::l3CacheOn,                                                     //l3CacheSettings
+        kernel->getThreadArbitrationPolicy(),                                             //threadArbitrationPolicy
+        kernel->getAdditionalKernelExecInfo(),                                            //additionalKernelExecInfo
+        kernel->getExecutionType(),                                                       //kernelExecutionType
+        memoryCompressionState,                                                           //memoryCompressionState
+        commandQueue.getSliceCount(),                                                     //sliceCount
+        true,                                                                             //blocking
+        flushDC,                                                                          //dcFlush
+        slmUsed,                                                                          //useSLM
+        true,                                                                             //guardCommandBufferWithPipeControl
+        NDRangeKernel,                                                                    //GSBA32BitRequired
+        requiresCoherency,                                                                //requiresCoherency
+        commandQueue.getPriority() == QueuePriority::LOW,                                 //lowPriority
+        false,                                                                            //implicitFlush
+        commandQueue.getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(),      //outOfOrderExecutionAllowed
+        false,                                                                            //epilogueRequired
+        kernel->requiresPerDssBackedBuffer(),                                             //usePerDssBackedBuffer
+        kernel->isSingleSubdevicePreferred(),                                             //useSingleSubdevice
+        kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, //useGlobalAtomics
+        kernel->getTotalNumDevicesInContext());                                           //numDevicesInContext

    if (timestampPacketDependencies) {
        eventsRequest.fillCsrDependencies(dispatchFlags.csrDependencies, commandStreamReceiver, CsrDependencies::DependenciesType::OutOfCsr);