From b85a8ace68e8b30a2ca533ab7076788dcc8cb406 Mon Sep 17 00:00:00 2001 From: Mateusz Jablonski Date: Tue, 24 Nov 2020 16:07:54 +0000 Subject: [PATCH] Pass root device index to Kernel's methods Related-To: NEO-5001 Signed-off-by: Mateusz Jablonski --- opencl/source/command_queue/enqueue_common.h | 2 +- opencl/source/command_queue/gpgpu_walker.h | 2 +- .../command_queue/gpgpu_walker_base.inl | 2 +- .../command_queue/gpgpu_walker_bdw_plus.inl | 4 +-- .../hardware_interface_bdw_plus.inl | 2 +- .../source/command_queue/local_work_size.cpp | 2 +- .../source/gen12lp/gpgpu_walker_gen12lp.cpp | 6 ++-- opencl/source/kernel/kernel.cpp | 23 ++++++++------- opencl/source/kernel/kernel.h | 16 +++++++---- opencl/source/kernel/kernel_extra.cpp | 2 +- .../accelerators/media_image_arg_tests.cpp | 4 +-- .../command_queue/enqueue_svm_tests.cpp | 4 +-- .../unit_test/gen11/kernel_tests_gen11.cpp | 2 +- .../gen12lp/gpgpu_walker_tests_gen12lp.cpp | 6 ++-- .../gen12lp/kernel_tests_gen12lp.inl | 4 +-- .../gen12lp/tgllp/kernel_tests_tgllp.cpp | 3 +- .../hardware_commands_helper_tests.cpp | 28 +++++++++---------- opencl/test/unit_test/kernel/kernel_tests.cpp | 13 +++++---- .../mem_obj/buffer_set_arg_tests.cpp | 4 +-- .../unit_test/mem_obj/image_set_arg_tests.cpp | 16 +++++------ opencl/test/unit_test/mocks/mock_kernel.cpp | 4 +-- opencl/test/unit_test/mocks/mock_kernel.h | 2 +- .../test/unit_test/program/program_tests.cpp | 2 +- .../sampler/sampler_set_arg_tests.cpp | 2 +- 24 files changed, 83 insertions(+), 72 deletions(-) diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index f5e9a6ae42..3ded661a81 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -868,7 +868,7 @@ void CommandQueueHw::enqueueBlocked( } else { continue; } - kernel->getResidency(allSurfaces); + kernel->getResidency(allSurfaces, device->getRootDeviceIndex()); } for (auto &surface : CreateRange(surfaces, surfaceCount)) { allSurfaces.push_back(surface->duplicate()); diff --git a/opencl/source/command_queue/gpgpu_walker.h b/opencl/source/command_queue/gpgpu_walker.h index fcfc72c1f9..e68c8bd684 100644 --- a/opencl/source/command_queue/gpgpu_walker.h +++ b/opencl/source/command_queue/gpgpu_walker.h @@ -101,7 +101,7 @@ class GpgpuWalkerHelper { bool disablePerfMode); static size_t getSizeForWADisableLSQCROPERFforOCL(const Kernel *pKernel); - static size_t getSizeForWaDisableRccRhwoOptimization(const Kernel *pKernel); + static size_t getSizeForWaDisableRccRhwoOptimization(const Kernel *pKernel, uint32_t rootDeviceIndex); static size_t setGpgpuWalkerThreadData( WALKER_TYPE *walkerCmd, diff --git a/opencl/source/command_queue/gpgpu_walker_base.inl b/opencl/source/command_queue/gpgpu_walker_base.inl index e9e8d1751b..a524c96015 100644 --- a/opencl/source/command_queue/gpgpu_walker_base.inl +++ b/opencl/source/command_queue/gpgpu_walker_base.inl @@ -172,7 +172,7 @@ size_t GpgpuWalkerHelper::getSizeForWADisableLSQCROPERFforOCL(const K } template -size_t GpgpuWalkerHelper::getSizeForWaDisableRccRhwoOptimization(const Kernel *pKernel) { +size_t GpgpuWalkerHelper::getSizeForWaDisableRccRhwoOptimization(const Kernel *pKernel, uint32_t rootDeviceIndex) { return 0u; } diff --git a/opencl/source/command_queue/gpgpu_walker_bdw_plus.inl b/opencl/source/command_queue/gpgpu_walker_bdw_plus.inl index fc8b866896..ddeafd683b 100644 --- a/opencl/source/command_queue/gpgpu_walker_bdw_plus.inl +++ b/opencl/source/command_queue/gpgpu_walker_bdw_plus.inl @@ -141,7 +141,7 @@ void GpgpuWalkerHelper::dispatchScheduler( *ioh, *ssh, scheduler, - scheduler.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), + scheduler.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, devQueueHw.getDevice().getRootDeviceIndex()), simd, localWorkSizes, offsetInterfaceDescriptorTable, @@ -211,7 +211,7 @@ size_t EnqueueOperation::getSizeRequiredCSKernel(bool reserveProfilin } size += PerformanceCounters::getGpuCommandsSize(commandQueue, reservePerfCounters); size += GpgpuWalkerHelper::getSizeForWADisableLSQCROPERFforOCL(pKernel); - size += GpgpuWalkerHelper::getSizeForWaDisableRccRhwoOptimization(pKernel); + size += GpgpuWalkerHelper::getSizeForWaDisableRccRhwoOptimization(pKernel, commandQueue.getDevice().getRootDeviceIndex()); return size; } diff --git a/opencl/source/command_queue/hardware_interface_bdw_plus.inl b/opencl/source/command_queue/hardware_interface_bdw_plus.inl index 1ae6f449ee..58d428bc90 100644 --- a/opencl/source/command_queue/hardware_interface_bdw_plus.inl +++ b/opencl/source/command_queue/hardware_interface_bdw_plus.inl @@ -93,7 +93,7 @@ inline void HardwareInterface::programWalker( ioh, ssh, kernel, - kernel.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), + kernel.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, commandQueue.getDevice().getRootDeviceIndex()), simd, localWorkSizes, offsetInterfaceDescriptorTable, diff --git a/opencl/source/command_queue/local_work_size.cpp b/opencl/source/command_queue/local_work_size.cpp index 2d663e4f9d..f422e636b9 100644 --- a/opencl/source/command_queue/local_work_size.cpp +++ b/opencl/source/command_queue/local_work_size.cpp @@ -419,7 +419,7 @@ Vec3 computeWorkgroupSize(const DispatchInfo &dispatchInfo) { const auto &hwInfo = device.getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); auto isSimulation = device.isSimulation(); - if (kernel->requiresLimitedWorkgroupSize() && hwHelper.isSpecialWorkgroupSizeRequired(hwInfo, isSimulation)) { + if (kernel->requiresLimitedWorkgroupSize(device.getRootDeviceIndex()) && hwHelper.isSpecialWorkgroupSizeRequired(hwInfo, isSimulation)) { setSpecialWorkgroupSize(workGroupSize); } else if (DebugManager.flags.EnableComputeWorkSizeND.get()) { WorkSizeInfo wsInfo(dispatchInfo); diff --git a/opencl/source/gen12lp/gpgpu_walker_gen12lp.cpp b/opencl/source/gen12lp/gpgpu_walker_gen12lp.cpp index 46f77333dc..ab33b249a8 100644 --- a/opencl/source/gen12lp/gpgpu_walker_gen12lp.cpp +++ b/opencl/source/gen12lp/gpgpu_walker_gen12lp.cpp @@ -64,7 +64,7 @@ void HardwareInterface::dispatchWorkarounds( using MI_LOAD_REGISTER_IMM = typename TGLLPFamily::MI_LOAD_REGISTER_IMM; using PIPE_CONTROL = typename TGLLPFamily::PIPE_CONTROL; - if (kernel.requiresWaDisableRccRhwoOptimization()) { + if (kernel.requiresWaDisableRccRhwoOptimization(commandQueue.getDevice().getRootDeviceIndex())) { PIPE_CONTROL cmdPipeControl = TGLLPFamily::cmdInitPipeControl; cmdPipeControl.setCommandStreamerStallEnable(true); @@ -80,8 +80,8 @@ void HardwareInterface::dispatchWorkarounds( } template <> -size_t GpgpuWalkerHelper::getSizeForWaDisableRccRhwoOptimization(const Kernel *pKernel) { - if (pKernel->requiresWaDisableRccRhwoOptimization()) { +size_t GpgpuWalkerHelper::getSizeForWaDisableRccRhwoOptimization(const Kernel *pKernel, uint32_t rootDeviceIndex) { + if (pKernel->requiresWaDisableRccRhwoOptimization(rootDeviceIndex)) { return (2 * (sizeof(TGLLPFamily::PIPE_CONTROL) + sizeof(TGLLPFamily::MI_LOAD_REGISTER_IMM))); } return 0u; diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index 130262cb37..6e201460fd 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -1142,8 +1142,7 @@ void Kernel::makeResident(CommandStreamReceiver &commandStreamReceiver) { } } -void Kernel::getResidency(std::vector &dst) { - auto rootDeviceIndex = getDevice().getRootDeviceIndex(); +void Kernel::getResidency(std::vector &dst, uint32_t rootDeviceIndex) { if (kernelDeviceInfos[rootDeviceIndex].privateSurface) { GeneralSurface *surface = new GeneralSurface(kernelDeviceInfos[rootDeviceIndex].privateSurface); dst.push_back(surface); @@ -2386,8 +2385,8 @@ void Kernel::fillWithBuffersForAuxTranslation(MemObjsForAuxTranslation &memObjsF } } -void Kernel::getAllocationsForCacheFlush(CacheFlushAllocationsVec &out) const { - if (false == HwHelper::cacheFlushAfterWalkerSupported(getDevice().getHardwareInfo())) { +void Kernel::getAllocationsForCacheFlush(CacheFlushAllocationsVec &out, uint32_t rootDeviceIndex) const { + if (false == HwHelper::cacheFlushAfterWalkerSupported(getHardwareInfo(rootDeviceIndex))) { return; } for (GraphicsAllocation *alloc : this->kernelArgRequiresCacheFlush) { @@ -2398,7 +2397,7 @@ void Kernel::getAllocationsForCacheFlush(CacheFlushAllocationsVec &out) const { out.push_back(alloc); } - auto global = getProgram()->getGlobalSurface(getDevice().getRootDeviceIndex()); + auto global = getProgram()->getGlobalSurface(rootDeviceIndex); if (global != nullptr) { out.push_back(global); } @@ -2440,7 +2439,8 @@ bool Kernel::checkIfIsParentKernelAndBlocksUsesPrintf() { uint64_t Kernel::getKernelStartOffset( const bool localIdsGenerationByRuntime, const bool kernelUsesLocalIds, - const bool isCssUsed) const { + const bool isCssUsed, + uint32_t rootDeviceIndex) const { uint64_t kernelStartOffset = 0; @@ -2453,7 +2453,7 @@ uint64_t Kernel::getKernelStartOffset( kernelStartOffset += getStartOffset(); - auto &hardwareInfo = getDevice().getHardwareInfo(); + auto &hardwareInfo = getHardwareInfo(rootDeviceIndex); auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); if (isCssUsed && hwHelper.isOffsetToSkipSetFFIDGPWARequired(hardwareInfo)) { @@ -2493,8 +2493,8 @@ uint32_t Kernel::getAdditionalKernelExecInfo() const { return this->additionalKernelExecInfo; } -bool Kernel::requiresWaDisableRccRhwoOptimization() const { - auto &hardwareInfo = getDevice().getHardwareInfo(); +bool Kernel::requiresWaDisableRccRhwoOptimization(uint32_t rootDeviceIndex) const { + auto &hardwareInfo = getHardwareInfo(rootDeviceIndex); auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); if (hwHelper.isWaDisableRccRhwoOptimizationRequired() && isUsingSharedObjArgs()) { @@ -2502,7 +2502,7 @@ bool Kernel::requiresWaDisableRccRhwoOptimization() const { auto clMemObj = static_cast(arg.object); auto memObj = castToObject(clMemObj); if (memObj && memObj->peekSharingHandler()) { - auto allocation = memObj->getGraphicsAllocation(getDevice().getRootDeviceIndex()); + auto allocation = memObj->getGraphicsAllocation(rootDeviceIndex); for (uint32_t handleId = 0u; handleId < allocation->getNumGmms(); handleId++) { if (allocation->getGmm(handleId)->gmmResourceInfo->getResourceFlags()->Info.MediaCompressed) { return true; @@ -2514,4 +2514,7 @@ bool Kernel::requiresWaDisableRccRhwoOptimization() const { return false; } +const HardwareInfo &Kernel::getHardwareInfo(uint32_t rootDeviceIndex) const { + return *executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo(); +} } // namespace NEO diff --git a/opencl/source/kernel/kernel.h b/opencl/source/kernel/kernel.h index e2f6d89e07..a96111f077 100644 --- a/opencl/source/kernel/kernel.h +++ b/opencl/source/kernel/kernel.h @@ -283,7 +283,7 @@ class Kernel : public BaseObject<_cl_kernel> { //residency for kernel surfaces MOCKABLE_VIRTUAL void makeResident(CommandStreamReceiver &commandStreamReceiver); - MOCKABLE_VIRTUAL void getResidency(std::vector &dst); + MOCKABLE_VIRTUAL void getResidency(std::vector &dst, uint32_t rootDeviceIndex); bool requiresCoherency(); void resetSharedObjectsPatchAddresses(); bool isUsingSharedObjArgs() const { return usingSharedObjArgs; } @@ -379,7 +379,7 @@ class Kernel : public BaseObject<_cl_kernel> { MOCKABLE_VIRTUAL bool requiresCacheFlushCommand(const CommandQueue &commandQueue) const; using CacheFlushAllocationsVec = StackVec; - void getAllocationsForCacheFlush(CacheFlushAllocationsVec &out) const; + void getAllocationsForCacheFlush(CacheFlushAllocationsVec &out, uint32_t rootDeviceIndex) const; void setAuxTranslationDirection(AuxTranslationDirection auxTranslationDirection) { this->auxTranslationDirection = auxTranslationDirection; @@ -404,17 +404,18 @@ class Kernel : public BaseObject<_cl_kernel> { uint64_t getKernelStartOffset( const bool localIdsGenerationByRuntime, const bool kernelUsesLocalIds, - const bool isCssUsed) const; + const bool isCssUsed, + uint32_t rootDeviceIndex) const; bool requiresPerDssBackedBuffer() const; - bool requiresLimitedWorkgroupSize() const; + bool requiresLimitedWorkgroupSize(uint32_t rootDeviceIndex) const; bool isKernelDebugEnabled() const { return debugEnabled; } int32_t setAdditionalKernelExecInfoWithParam(uint32_t paramName, size_t paramValueSize, const void *paramValue); void setAdditionalKernelExecInfo(uint32_t additionalKernelExecInfo); uint32_t getAdditionalKernelExecInfo() const; - MOCKABLE_VIRTUAL bool requiresWaDisableRccRhwoOptimization() const; + MOCKABLE_VIRTUAL bool requiresWaDisableRccRhwoOptimization(uint32_t rootDeviceIndex) const; const ClDeviceVector &getDevices() const { - return deviceVector; + return program->getDevices(); } protected: @@ -507,9 +508,12 @@ class Kernel : public BaseObject<_cl_kernel> { void addAllocationToCacheFlushVector(uint32_t argIndex, GraphicsAllocation *argAllocation); bool allocationForCacheFlush(GraphicsAllocation *argAllocation) const; + const HardwareInfo &getHardwareInfo(uint32_t rootDeviceIndex) const; + const ClDevice &getDevice() const { return *deviceVector[0]; } + const ExecutionEnvironment &executionEnvironment; Program *program; const ClDeviceVector &deviceVector; diff --git a/opencl/source/kernel/kernel_extra.cpp b/opencl/source/kernel/kernel_extra.cpp index 9ed6844598..9712a23310 100644 --- a/opencl/source/kernel/kernel_extra.cpp +++ b/opencl/source/kernel/kernel_extra.cpp @@ -33,7 +33,7 @@ bool Kernel::requiresPerDssBackedBuffer() const { return DebugManager.flags.ForcePerDssBackedBufferProgramming.get(); } -bool Kernel::requiresLimitedWorkgroupSize() const { +bool Kernel::requiresLimitedWorkgroupSize(uint32_t rootDeviceIndex) const { return this->isBuiltIn; } diff --git a/opencl/test/unit_test/accelerators/media_image_arg_tests.cpp b/opencl/test/unit_test/accelerators/media_image_arg_tests.cpp index 87f57c7a24..45469c8e2c 100644 --- a/opencl/test/unit_test/accelerators/media_image_arg_tests.cpp +++ b/opencl/test/unit_test/accelerators/media_image_arg_tests.cpp @@ -93,7 +93,7 @@ HWTEST_F(MediaImageSetArgTest, WhenSettingMediaImageArgThenArgsSetCorrectly) { pSurfaceState->getSurfaceBaseAddress()); std::vector surfaces; - pKernel->getResidency(surfaces); + pKernel->getResidency(surfaces, rootDeviceIndex); EXPECT_EQ(0u, surfaces.size()); } @@ -131,7 +131,7 @@ HWTEST_F(MediaImageSetArgTest, WhenSettingKernelArgImageThenArgsSetCorrectly) { EXPECT_EQ(MEDIA_SURFACE_STATE::PICTURE_STRUCTURE_FRAME_PICTURE, pSurfaceState->getPictureStructure()); std::vector surfaces; - pKernel->getResidency(surfaces); + pKernel->getResidency(surfaces, rootDeviceIndex); for (auto &surface : surfaces) { delete surface; diff --git a/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp index 587a56b706..510984ecdb 100644 --- a/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp @@ -769,7 +769,7 @@ TEST_F(EnqueueSvmTest, givenEnqueueTaskBlockedOnUserEventWhenItIsEnqueuedThenSur auto kernel = clUniquePtr(Kernel::create(program.get(), *program->getKernelInfo("FillBufferBytes"), &retVal)); std::vector allSurfaces; - kernel->getResidency(allSurfaces); + kernel->getResidency(allSurfaces, rootDeviceIndex); EXPECT_EQ(1u, allSurfaces.size()); kernel->setSvmKernelExecInfo(pSvmAlloc); @@ -789,7 +789,7 @@ TEST_F(EnqueueSvmTest, givenEnqueueTaskBlockedOnUserEventWhenItIsEnqueuedThenSur nullptr); EXPECT_EQ(CL_SUCCESS, retVal); - kernel->getResidency(allSurfaces); + kernel->getResidency(allSurfaces, rootDeviceIndex); EXPECT_EQ(3u, allSurfaces.size()); for (auto &surface : allSurfaces) diff --git a/opencl/test/unit_test/gen11/kernel_tests_gen11.cpp b/opencl/test/unit_test/gen11/kernel_tests_gen11.cpp index 4830033d90..01e50274d5 100644 --- a/opencl/test/unit_test/gen11/kernel_tests_gen11.cpp +++ b/opencl/test/unit_test/gen11/kernel_tests_gen11.cpp @@ -21,5 +21,5 @@ GEN11TEST_F(Gen11KernelTest, givenKernelWhenCanTransformImagesIsCalledThenReturn GEN11TEST_F(Gen11KernelTest, GivenKernelWhenNotRunningOnGen12lpThenWaDisableRccRhwoOptimizationIsNotRequired) { MockKernelWithInternals kernel(*pClDevice); - EXPECT_FALSE(kernel.mockKernel->requiresWaDisableRccRhwoOptimization()); + EXPECT_FALSE(kernel.mockKernel->requiresWaDisableRccRhwoOptimization(rootDeviceIndex)); } diff --git a/opencl/test/unit_test/gen12lp/gpgpu_walker_tests_gen12lp.cpp b/opencl/test/unit_test/gen12lp/gpgpu_walker_tests_gen12lp.cpp index be8b9402a4..90b142b441 100644 --- a/opencl/test/unit_test/gen12lp/gpgpu_walker_tests_gen12lp.cpp +++ b/opencl/test/unit_test/gen12lp/gpgpu_walker_tests_gen12lp.cpp @@ -38,7 +38,7 @@ GEN12LPTEST_F(GpgpuWalkerTests, givenMiStoreRegMemWhenAdjustMiStoreRegMemModeThe class MockKernelWithApplicableWa : public MockKernel { public: MockKernelWithApplicableWa(Program *program, KernelInfo &kernelInfo) : MockKernel(program, kernelInfo) {} - bool requiresWaDisableRccRhwoOptimization() const override { + bool requiresWaDisableRccRhwoOptimization(uint32_t rootDeviceIndex) const override { return waApplicable; } bool waApplicable = false; @@ -135,14 +135,14 @@ GEN12LPTEST_F(HardwareInterfaceTests, GivenKernelWithApplicableWaDisableRccRhwoO using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; pKernel->waApplicable = true; - auto cmdSize = GpgpuWalkerHelper::getSizeForWaDisableRccRhwoOptimization(pKernel); + auto cmdSize = GpgpuWalkerHelper::getSizeForWaDisableRccRhwoOptimization(pKernel, rootDeviceIndex); size_t expectedSize = 2 * (sizeof(PIPE_CONTROL) + sizeof(MI_LOAD_REGISTER_IMM)); EXPECT_EQ(expectedSize, cmdSize); } GEN12LPTEST_F(HardwareInterfaceTests, GivenKernelWithoutApplicableWaDisableRccRhwoOptimizationWhenCalculatingCommandsSizeThenZeroIsReturned) { pKernel->waApplicable = false; - auto cmdSize = GpgpuWalkerHelper::getSizeForWaDisableRccRhwoOptimization(pKernel); + auto cmdSize = GpgpuWalkerHelper::getSizeForWaDisableRccRhwoOptimization(pKernel, rootDeviceIndex); EXPECT_EQ(0u, cmdSize); } diff --git a/opencl/test/unit_test/gen12lp/kernel_tests_gen12lp.inl b/opencl/test/unit_test/gen12lp/kernel_tests_gen12lp.inl index edcb96b3ad..84f8e562ac 100644 --- a/opencl/test/unit_test/gen12lp/kernel_tests_gen12lp.inl +++ b/opencl/test/unit_test/gen12lp/kernel_tests_gen12lp.inl @@ -22,7 +22,7 @@ GEN12LPTEST_F(Gen12LpKernelTest, givenKernelWhenCanTransformImagesIsCalledThenRe GEN12LPTEST_F(Gen12LpKernelTest, GivenKernelWhenNotUsingSharedObjArgsThenWaDisableRccRhwoOptimizationIsNotRequired) { MockKernelWithInternals kernel(*pClDevice); - EXPECT_FALSE(kernel.mockKernel->requiresWaDisableRccRhwoOptimization()); + EXPECT_FALSE(kernel.mockKernel->requiresWaDisableRccRhwoOptimization(rootDeviceIndex)); } GEN12LPTEST_F(Gen12LpKernelTest, GivenKernelWhenAtLeastOneArgIsMediaCompressedThenWaDisableRccRhwoOptimizationIsRequired) { @@ -56,5 +56,5 @@ GEN12LPTEST_F(Gen12LpKernelTest, GivenKernelWhenAtLeastOneArgIsMediaCompressedTh cl_mem clMem2 = &bufferMediaCompressed; kernel.mockKernel->setArgBuffer(2, sizeof(cl_mem *), &clMem2); - EXPECT_TRUE(kernel.mockKernel->requiresWaDisableRccRhwoOptimization()); + EXPECT_TRUE(kernel.mockKernel->requiresWaDisableRccRhwoOptimization(rootDeviceIndex)); } diff --git a/opencl/test/unit_test/gen12lp/tgllp/kernel_tests_tgllp.cpp b/opencl/test/unit_test/gen12lp/tgllp/kernel_tests_tgllp.cpp index e30d7938d4..6351ddb713 100644 --- a/opencl/test/unit_test/gen12lp/tgllp/kernel_tests_tgllp.cpp +++ b/opencl/test/unit_test/gen12lp/tgllp/kernel_tests_tgllp.cpp @@ -28,11 +28,12 @@ TGLLPTEST_F(KernelTgllpTests, GivenUseOffsetToSkipSetFFIDGPWorkaroundActiveWhenS hwInfo.platform.usRevId = stepping; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); + auto rootDeviceIndex = device->getRootDeviceIndex(); MockKernelWithInternals mockKernelWithInternals{*device}; mockKernelWithInternals.kernelInfo.patchInfo.threadPayload = &threadPayload; for (auto isCcsUsed : ::testing::Bool()) { - uint64_t kernelStartOffset = mockKernelWithInternals.mockKernel->getKernelStartOffset(false, false, isCcsUsed); + uint64_t kernelStartOffset = mockKernelWithInternals.mockKernel->getKernelStartOffset(false, false, isCcsUsed, rootDeviceIndex); if (stepping == REVISION_A0 && isCcsUsed) { EXPECT_EQ(defaultKernelStartOffset + additionalOffsetDueToFfid, kernelStartOffset); diff --git a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp index 04b4f53b93..833664ac77 100644 --- a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp +++ b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp @@ -345,7 +345,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenAllocatingIndirectStateRes ioh, ssh, *kernel, - kernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), + kernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex), kernel->getKernelInfo().getMaxSimdSize(), localWorkSizes, IDToffset, @@ -400,7 +400,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWithFourBindingTabl ioh, ssh, *mockKernelWithInternal->mockKernel, - mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), + mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex), mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(), localWorkSizes, 0, @@ -448,7 +448,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelThatIsSchedulerWhen ioh, ssh, *mockKernelWithInternal->mockKernel, - mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), + mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex), mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(), localWorkSizes, 0, @@ -490,7 +490,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWith100BindingTable ioh, ssh, *mockKernelWithInternal->mockKernel, - mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), + mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex), mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(), localWorkSizes, 0, @@ -568,7 +568,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, whenSendingIndirectStateThenKe ioh, ssh, mockKernel, - mockKernel.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), + mockKernel.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex), modifiedKernelInfo.getMaxSimdSize(), localWorkSizes, IDToffset, @@ -659,7 +659,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenSendingIndirectStateThenBi ioh, ssh, *kernel, - kernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), + kernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex), kernel->getKernelInfo().getMaxSimdSize(), localWorkSizes, 0, @@ -819,7 +819,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenGettingBindingTableStateTh ioh, ssh, *pKernel, - pKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), + pKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex), pKernel->getKernelInfo().getMaxSimdSize(), localWorkSizes, 0, @@ -1016,7 +1016,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithSamplersWhenInd ioh, ssh, *mockKernelWithInternal->mockKernel, - mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed), + mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed, rootDeviceIndex), 8, localWorkSizes, interfaceDescriptorTableOffset, @@ -1196,7 +1196,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenCacheFlushAfterWalkerEnab mockKernelWithInternal->mockProgram->setGlobalSurface(&globalAllocation); Kernel::CacheFlushAllocationsVec allocs; - mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocs); + mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocs, rootDeviceIndex); EXPECT_NE(allocs.end(), std::find(allocs.begin(), allocs.end(), &globalAllocation)); size_t expectedSize = sizeof(PIPE_CONTROL); @@ -1235,7 +1235,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenCacheFlushAfterWalkerEnab mockKernelWithInternal->mockKernel->svmAllocationsRequireCacheFlush = true; Kernel::CacheFlushAllocationsVec allocs; - mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocs); + mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocs, rootDeviceIndex); EXPECT_NE(allocs.end(), std::find(allocs.begin(), allocs.end(), &svmAllocation1)); EXPECT_EQ(allocs.end(), std::find(allocs.begin(), allocs.end(), &svmAllocation2)); @@ -1270,7 +1270,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenCacheFlushAfterWalkerEnab mockKernelWithInternal->mockKernel->kernelArgRequiresCacheFlush[0] = &cacheRequiringAllocation; Kernel::CacheFlushAllocationsVec allocs; - mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocs); + mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocs, rootDeviceIndex); EXPECT_NE(allocs.end(), std::find(allocs.begin(), allocs.end(), &cacheRequiringAllocation)); size_t expectedSize = sizeof(PIPE_CONTROL); @@ -1306,7 +1306,7 @@ TEST_F(HardwareCommandsTest, givenCacheFlushAfterWalkerEnabledWhenPlatformNotSup hardwareInfo.capabilityTable.supportCacheFlushAfterWalker = false; StackVec allocationsForCacheFlush; - mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocationsForCacheFlush); + mockKernelWithInternal->mockKernel->getAllocationsForCacheFlush(allocationsForCacheFlush, rootDeviceIndex); EXPECT_EQ(0U, allocationsForCacheFlush.size()); } @@ -1324,13 +1324,13 @@ HWTEST_F(KernelCacheFlushTests, givenLocallyUncachedBufferWhenGettingAllocations using CacheFlushAllocationsVec = StackVec; CacheFlushAllocationsVec cacheFlushVec; - kernel->getAllocationsForCacheFlush(cacheFlushVec); + kernel->getAllocationsForCacheFlush(cacheFlushVec, rootDeviceIndex); EXPECT_EQ(0u, cacheFlushVec.size()); auto bufferRegular = clCreateBufferWithPropertiesINTEL(context, nullptr, 0, 1, nullptr, nullptr); kernel->setArg(1, sizeof(bufferRegular), &bufferRegular); - kernel->getAllocationsForCacheFlush(cacheFlushVec); + kernel->getAllocationsForCacheFlush(cacheFlushVec, rootDeviceIndex); size_t expectedCacheFlushVecSize = (hardwareInfo.capabilityTable.supportCacheFlushAfterWalker ? 1u : 0u); EXPECT_EQ(expectedCacheFlushVecSize, cacheFlushVec.size()); diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index 28dabfbc6d..42f791ffd8 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -1675,7 +1675,7 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenExportedFun // check getResidency as well std::vector residencySurfaces; - pKernel->getResidency(residencySurfaces); + pKernel->getResidency(residencySurfaces, rootDeviceIndex); std::unique_ptr mockCsrExecEnv; { CommandStreamReceiverMock csrMock; @@ -1711,7 +1711,7 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenGlobalBuffe EXPECT_TRUE(commandStreamReceiver.isMadeResident(program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface)); std::vector residencySurfaces; - pKernel->getResidency(residencySurfaces); + pKernel->getResidency(residencySurfaces, rootDeviceIndex); std::unique_ptr mockCsrExecEnv; { CommandStreamReceiverMock csrMock; @@ -3059,6 +3059,7 @@ TEST(KernelTest, GivenDifferentValuesWhenSetKernelExecutionTypeIsCalledThenCorre TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseWhenGettingStartOffsetThenOffsetToSkipPerThreadDataLoadIsAdded) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + auto rootDeviceIndex = device->getRootDeviceIndex(); MockKernelWithInternals mockKernel(*device); SPatchThreadPayload threadPayload = {}; @@ -3070,13 +3071,14 @@ TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseWhenGettingStartOffse auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch(); mockKernel.mockKernel->setStartOffset(128); - auto offset = mockKernel.mockKernel->getKernelStartOffset(false, true, false); + auto offset = mockKernel.mockKernel->getKernelStartOffset(false, true, false, rootDeviceIndex); EXPECT_EQ(allocationOffset + 256u, offset); device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation()); } TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeTrueAndLocalIdsUsedWhenGettingStartOffsetThenOffsetToSkipPerThreadDataLoadIsNotAdded) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + auto rootDeviceIndex = device->getRootDeviceIndex(); MockKernelWithInternals mockKernel(*device); SPatchThreadPayload threadPayload = {}; @@ -3088,13 +3090,14 @@ TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeTrueAndLocalIdsUsedWhenGet auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch(); mockKernel.mockKernel->setStartOffset(128); - auto offset = mockKernel.mockKernel->getKernelStartOffset(true, true, false); + auto offset = mockKernel.mockKernel->getKernelStartOffset(true, true, false, rootDeviceIndex); EXPECT_EQ(allocationOffset + 128u, offset); device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation()); } TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseAndLocalIdsNotUsedWhenGettingStartOffsetThenOffsetToSkipPerThreadDataLoadIsNotAdded) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + auto rootDeviceIndex = device->getRootDeviceIndex(); MockKernelWithInternals mockKernel(*device); SPatchThreadPayload threadPayload = {}; @@ -3106,7 +3109,7 @@ TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseAndLocalIdsNotUsedWhe auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch(); mockKernel.mockKernel->setStartOffset(128); - auto offset = mockKernel.mockKernel->getKernelStartOffset(false, false, false); + auto offset = mockKernel.mockKernel->getKernelStartOffset(false, false, false, rootDeviceIndex); EXPECT_EQ(allocationOffset + 128u, offset); device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation()); } diff --git a/opencl/test/unit_test/mem_obj/buffer_set_arg_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_set_arg_tests.cpp index 39b01c90eb..b5b4ca0a0a 100644 --- a/opencl/test/unit_test/mem_obj/buffer_set_arg_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_set_arg_tests.cpp @@ -281,7 +281,7 @@ TEST_F(BufferSetArgTest, WhenSettingKernelArgThenAddressToPatchIsSetCorrectlyAnd EXPECT_EQ(reinterpret_cast(buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddressToPatch()), *pKernelArg); std::vector surfaces; - pKernel->getResidency(surfaces); + pKernel->getResidency(surfaces, rootDeviceIndex); EXPECT_EQ(1u, surfaces.size()); for (auto &surface : surfaces) { @@ -311,7 +311,7 @@ TEST_F(BufferSetArgTest, GivenSvmPointerWhenSettingKernelArgThenAddressToPatchIs EXPECT_EQ(ptrSVM, *pKernelArg); std::vector surfaces; - pKernel->getResidency(surfaces); + pKernel->getResidency(surfaces, rootDeviceIndex); EXPECT_EQ(1u, surfaces.size()); for (auto &surface : surfaces) { delete surface; diff --git a/opencl/test/unit_test/mem_obj/image_set_arg_tests.cpp b/opencl/test/unit_test/mem_obj/image_set_arg_tests.cpp index 72f980b23d..f4ec5e95a3 100644 --- a/opencl/test/unit_test/mem_obj/image_set_arg_tests.cpp +++ b/opencl/test/unit_test/mem_obj/image_set_arg_tests.cpp @@ -129,7 +129,7 @@ HWTEST_F(ImageSetArgTest, WhenSettingKernelArgImageThenSurfaceBaseAddressIsSetCo EXPECT_EQ(srcAllocation->getGpuAddress(), surfaceAddress); std::vector surfaces; - pKernel->getResidency(surfaces); + pKernel->getResidency(surfaces, rootDeviceIndex); EXPECT_EQ(0u, surfaces.size()); } @@ -339,7 +339,7 @@ HWTEST_F(ImageSetArgTest, givenOffsetedBufferWhenSetKernelArgImageIscalledThenFu EXPECT_EQ(srcAllocation->getGpuAddress(), surfaceAddress); std::vector surfaces; - pKernel->getResidency(surfaces); + pKernel->getResidency(surfaces, rootDeviceIndex); EXPECT_EQ(0u, surfaces.size()); } @@ -382,7 +382,7 @@ HWTEST_F(ImageSetArgTest, WhenSettingKernelArgThenPropertiesAreSetCorrectly) { EXPECT_EQ(0u, surfaceState->getCoherencyType()); std::vector surfaces; - pKernel->getResidency(surfaces); + pKernel->getResidency(surfaces, rootDeviceIndex); EXPECT_EQ(1u, surfaces.size()); for (auto &surface : surfaces) { @@ -454,7 +454,7 @@ HWTEST_F(ImageSetArgTest, Given2dArrayWhenSettingKernelArgThenPropertiesAreSetCo EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA, surfaceState->getShaderChannelSelectAlpha()); std::vector surfaces; - pKernel->getResidency(surfaces); + pKernel->getResidency(surfaces, rootDeviceIndex); EXPECT_EQ(1u, surfaces.size()); for (auto &surface : surfaces) { delete surface; @@ -502,7 +502,7 @@ HWTEST_F(ImageSetArgTest, Given1dArrayWhenSettingKernelArgThenPropertiesAreSetCo EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA, surfaceState->getShaderChannelSelectAlpha()); std::vector surfaces; - pKernel->getResidency(surfaces); + pKernel->getResidency(surfaces, rootDeviceIndex); EXPECT_EQ(1u, surfaces.size()); for (auto &surface : surfaces) { delete surface; @@ -846,7 +846,7 @@ HWTEST_F(ImageSetArgTest, GivenImageWithClLuminanceFormatWhenSettingKernelArgThe EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ALPHA, surfaceState->getShaderChannelSelectAlpha()); std::vector surfaces; - pKernel->getResidency(surfaces); + pKernel->getResidency(surfaces, rootDeviceIndex); EXPECT_EQ(1u, surfaces.size()); for (auto &surface : surfaces) { delete surface; @@ -866,7 +866,7 @@ HWTEST_F(ImageSetArgTest, WhenSettingArgThenImageIsReturned) { EXPECT_EQ(memObj, pKernel->getKernelArg(0)); std::vector surfaces; - pKernel->getResidency(surfaces); + pKernel->getResidency(surfaces, rootDeviceIndex); EXPECT_EQ(1u, surfaces.size()); for (auto &surface : surfaces) { @@ -990,7 +990,7 @@ HWTEST_F(ImageMediaBlockSetArgTest, WhenSettingKernelArgImageThenPropertiesAreCo EXPECT_EQ(imageMocs, surfaceState->getMemoryObjectControlState()); std::vector surfaces; - pKernel->getResidency(surfaces); + pKernel->getResidency(surfaces, rootDeviceIndex); EXPECT_EQ(1u, surfaces.size()); for (auto &surface : surfaces) { diff --git a/opencl/test/unit_test/mocks/mock_kernel.cpp b/opencl/test/unit_test/mocks/mock_kernel.cpp index 91b3e2b4db..1185e04e26 100644 --- a/opencl/test/unit_test/mocks/mock_kernel.cpp +++ b/opencl/test/unit_test/mocks/mock_kernel.cpp @@ -49,9 +49,9 @@ void MockKernel::makeResident(CommandStreamReceiver &commandStreamReceiver) { Kernel::makeResident(commandStreamReceiver); } -void MockKernel::getResidency(std::vector &dst) { +void MockKernel::getResidency(std::vector &dst, uint32_t rootDeviceIndex) { getResidencyCalls++; - Kernel::getResidency(dst); + Kernel::getResidency(dst, rootDeviceIndex); } bool MockKernel::requiresCacheFlushCommand(const CommandQueue &commandQueue) const { if (DebugManager.flags.EnableCacheFlushAfterWalker.get() != -1) { diff --git a/opencl/test/unit_test/mocks/mock_kernel.h b/opencl/test/unit_test/mocks/mock_kernel.h index d21e4dbef8..300fecd3ee 100644 --- a/opencl/test/unit_test/mocks/mock_kernel.h +++ b/opencl/test/unit_test/mocks/mock_kernel.h @@ -223,7 +223,7 @@ class MockKernel : public Kernel { void setUsingSharedArgs(bool usingSharedArgValue) { this->usingSharedObjArgs = usingSharedArgValue; } void makeResident(CommandStreamReceiver &commandStreamReceiver) override; - void getResidency(std::vector &dst) override; + void getResidency(std::vector &dst, uint32_t rootDeviceIndex) override; void takeOwnership() const override { Kernel::takeOwnership(); takeOwnershipCalls++; diff --git a/opencl/test/unit_test/program/program_tests.cpp b/opencl/test/unit_test/program/program_tests.cpp index 41cb6020e3..fa673ad383 100644 --- a/opencl/test/unit_test/program/program_tests.cpp +++ b/opencl/test/unit_test/program/program_tests.cpp @@ -1355,7 +1355,7 @@ HWTEST_F(PatchTokenTests, givenKernelRequiringConstantAllocationWhenMakeResident EXPECT_EQ(0u, pCommandStreamReceiver->residency.size()); std::vector surfaces; - pKernel->getResidency(surfaces); + pKernel->getResidency(surfaces, rootDeviceIndex); EXPECT_EQ(2u, surfaces.size()); for (Surface *surface : surfaces) { diff --git a/opencl/test/unit_test/sampler/sampler_set_arg_tests.cpp b/opencl/test/unit_test/sampler/sampler_set_arg_tests.cpp index 4c44fa434a..f613d09dac 100644 --- a/opencl/test/unit_test/sampler/sampler_set_arg_tests.cpp +++ b/opencl/test/unit_test/sampler/sampler_set_arg_tests.cpp @@ -133,7 +133,7 @@ HWTEST_F(SamplerSetArgTest, WhenSettingKernelArgSamplerThenSamplerStatesAreCorre EXPECT_EQ(SAMPLER_STATE::MIP_MODE_FILTER_NEAREST, samplerState->getMipModeFilter()); std::vector surfaces; - pKernel->getResidency(surfaces); + pKernel->getResidency(surfaces, rootDeviceIndex); EXPECT_EQ(0u, surfaces.size()); }