From 1c1e437d4be77ba9c03b840b7ae38fb2755d0e80 Mon Sep 17 00:00:00 2001 From: Zbigniew Zdanowicz Date: Tue, 23 Jul 2024 12:14:04 +0000 Subject: [PATCH] refactor: split kernel residency into internal and argument containers Related-To: NEO-11719 Signed-off-by: Zbigniew Zdanowicz --- .../cmdlist/cmdlist_hw_skl_to_tgllp.inl | 8 +++-- .../cmdlist/cmdlist_hw_xehp_and_later.inl | 29 +++++++++------ level_zero/core/source/kernel/kernel.h | 3 +- level_zero/core/source/kernel/kernel_imp.cpp | 36 +++++++++---------- level_zero/core/source/kernel/kernel_imp.h | 11 ++++-- .../test/unit_tests/fixtures/module_fixture.h | 3 +- .../core/test/unit_tests/mocks/mock_cmdlist.h | 4 +-- .../core/test/unit_tests/mocks/mock_kernel.h | 3 +- .../unit_tests/sources/assert/test_assert.cpp | 2 +- .../sources/cmdlist/test_cmdlist_5.cpp | 6 ++-- .../test_cmdlist_append_launch_kernel_1.cpp | 2 +- .../sources/cmdqueue/test_cmdqueue_2.cpp | 2 +- .../unit_tests/sources/kernel/test_kernel.cpp | 21 +++++------ .../unit_tests/sources/module/test_module.cpp | 16 ++++----- .../test_cmdlist_xe2_hpg_core.cpp | 10 +++--- .../xe_hpc_core/test_cmdlist_xe_hpc_core.cpp | 10 +++--- 16 files changed, 93 insertions(+), 73 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl b/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl index 140c552c2f..d53003a65d 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl @@ -254,8 +254,12 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K appendSignalEventPostWalker(event, nullptr, nullptr, false, false, false); commandContainer.addToResidencyContainer(kernelImmutableData->getIsaGraphicsAllocation()); - auto &residencyContainer = kernel->getResidencyContainer(); - for (auto resource : residencyContainer) { + auto &argumentsResidencyContainer = kernel->getArgumentsResidencyContainer(); + for (auto resource : argumentsResidencyContainer) { + commandContainer.addToResidencyContainer(resource); + } + auto &internalResidencyContainer = kernel->getInternalResidencyContainer(); + for (auto resource : internalResidencyContainer) { commandContainer.addToResidencyContainer(resource); } diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index 8358e1b86d..386620cffc 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -225,15 +225,20 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K bool isKernelUsingSystemAllocation = false; if (!launchParams.isBuiltInKernel) { - auto &kernelAllocations = kernel->getResidencyContainer(); - for (auto &allocation : kernelAllocations) { - if (allocation == nullptr) { - continue; + auto verifyKernelUsingSystemAllocations = [&isKernelUsingSystemAllocation](const NEO::ResidencyContainer &kernelResidencyContainer) { + for (const auto &allocation : kernelResidencyContainer) { + if (allocation == nullptr) { + continue; + } + if (allocation->getAllocationType() == NEO::AllocationType::bufferHostMemory) { + isKernelUsingSystemAllocation = true; + } } - if (allocation->getAllocationType() == NEO::AllocationType::bufferHostMemory) { - isKernelUsingSystemAllocation = true; - } - } + }; + + verifyKernelUsingSystemAllocations(kernel->getArgumentsResidencyContainer()); + verifyKernelUsingSystemAllocations(kernel->getInternalResidencyContainer()); + } else { isKernelUsingSystemAllocation = launchParams.isDestinationAllocationInSystemMemory; } @@ -437,9 +442,13 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K // Attach kernel residency to our CommandList residency { commandContainer.addToResidencyContainer(kernelImmutableData->getIsaGraphicsAllocation()); + auto &internalResidencyContainer = kernel->getInternalResidencyContainer(); + for (auto resource : internalResidencyContainer) { + commandContainer.addToResidencyContainer(resource); + } if (!launchParams.omitAddingKernelResidency) { - auto &residencyContainer = kernel->getResidencyContainer(); - for (auto resource : residencyContainer) { + auto &argumentsResidencyContainer = kernel->getArgumentsResidencyContainer(); + for (auto resource : argumentsResidencyContainer) { commandContainer.addToResidencyContainer(resource); } } diff --git a/level_zero/core/source/kernel/kernel.h b/level_zero/core/source/kernel/kernel.h index f9abf2238d..b5b3271ff8 100644 --- a/level_zero/core/source/kernel/kernel.h +++ b/level_zero/core/source/kernel/kernel.h @@ -148,7 +148,8 @@ struct Kernel : _ze_kernel_handle_t, virtual NEO::DispatchKernelEncoderI { virtual const KernelImmutableData *getImmutableData() const = 0; - virtual const std::vector &getResidencyContainer() const = 0; + virtual const std::vector &getArgumentsResidencyContainer() const = 0; + virtual const std::vector &getInternalResidencyContainer() const = 0; virtual UnifiedMemoryControls getUnifiedMemoryControls() const = 0; virtual bool hasIndirectAllocationsAllowed() const = 0; diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp index dcc5d5ae5e..e52fa30561 100644 --- a/level_zero/core/source/kernel/kernel_imp.cpp +++ b/level_zero/core/source/kernel/kernel_imp.cpp @@ -586,7 +586,7 @@ ze_result_t KernelImp::setArgImmediate(uint32_t argIndex, size_t argSize, const ze_result_t KernelImp::setArgRedescribedImage(uint32_t argIndex, ze_image_handle_t argVal) { const auto &arg = kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex].as(); if (argVal == nullptr) { - residencyContainer[argIndex] = nullptr; + argumentsResidencyContainer[argIndex] = nullptr; return ZE_RESULT_SUCCESS; } @@ -620,7 +620,7 @@ ze_result_t KernelImp::setArgRedescribedImage(uint32_t argIndex, ze_image_handle } else { image->copyRedescribedSurfaceStateToSSH(surfaceStateHeapData.get(), arg.bindful); } - residencyContainer[argIndex] = image->getAllocation(); + argumentsResidencyContainer[argIndex] = image->getAllocation(); return ZE_RESULT_SUCCESS; } @@ -656,7 +656,7 @@ ze_result_t KernelImp::setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal } this->setKernelArgUncached(argIndex, argIsUncacheable); } - residencyContainer[argIndex] = allocation; + argumentsResidencyContainer[argIndex] = allocation; return ZE_RESULT_SUCCESS; } @@ -727,7 +727,7 @@ ze_result_t KernelImp::setArgBuffer(uint32_t argIndex, size_t argSize, const voi } if (nullptr == argVal) { - residencyContainer[argIndex] = nullptr; + argumentsResidencyContainer[argIndex] = nullptr; const auto &arg = kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex].as(); uintptr_t nullBufferValue = 0; NEO::patchPointer(ArrayRef(crossThreadData.get(), crossThreadDataSize), arg, nullBufferValue); @@ -774,7 +774,7 @@ ze_result_t KernelImp::setArgBuffer(uint32_t argIndex, size_t argSize, const voi for (const auto &mappedAllocationData : allocData->virtualReservationData->mappedAllocations) { // Add additional allocations to the residency container if the virtual reservation spans multiple allocations. if (requestedAddress != mappedAllocationData.second->ptr) { - this->residencyContainer.push_back(mappedAllocationData.second->mappedAllocation->allocation); + this->argumentsResidencyContainer.push_back(mappedAllocationData.second->mappedAllocation->allocation); } } } @@ -784,7 +784,7 @@ ze_result_t KernelImp::setArgBuffer(uint32_t argIndex, size_t argSize, const voi ze_result_t KernelImp::setArgImage(uint32_t argIndex, size_t argSize, const void *argVal) { if (argVal == nullptr) { - residencyContainer[argIndex] = nullptr; + argumentsResidencyContainer[argIndex] = nullptr; return ZE_RESULT_SUCCESS; } @@ -824,10 +824,10 @@ ze_result_t KernelImp::setArgImage(uint32_t argIndex, size_t argSize, const void image->copySurfaceStateToSSH(surfaceStateHeapData.get(), arg.bindful, isMediaBlockImage); } - residencyContainer[argIndex] = image->getAllocation(); + argumentsResidencyContainer[argIndex] = image->getAllocation(); if (image->getImplicitArgsAllocation()) { - this->residencyContainer.push_back(image->getImplicitArgsAllocation()); + this->argumentsResidencyContainer.push_back(image->getImplicitArgsAllocation()); } auto imageInfo = image->getImageInfo(); @@ -1091,13 +1091,13 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) { } } - residencyContainer.resize(this->kernelArgHandlers.size(), nullptr); + argumentsResidencyContainer.resize(this->kernelArgHandlers.size(), nullptr); auto &kernelAttributes = kernelDescriptor.kernelAttributes; if ((kernelAttributes.perHwThreadPrivateMemorySize != 0U) && (false == module->shouldAllocatePrivateMemoryPerDispatch())) { this->privateMemoryGraphicsAllocation = allocatePrivateMemoryGraphicsAllocation(); this->patchCrossthreadDataWithPrivateAllocation(this->privateMemoryGraphicsAllocation); - this->residencyContainer.push_back(this->privateMemoryGraphicsAllocation); + this->internalResidencyContainer.push_back(this->privateMemoryGraphicsAllocation); } this->createPrintfBuffer(); @@ -1106,8 +1106,8 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) { this->setAssertBuffer(); - residencyContainer.insert(residencyContainer.end(), kernelImmData->getResidencyContainer().begin(), - kernelImmData->getResidencyContainer().end()); + internalResidencyContainer.insert(internalResidencyContainer.end(), kernelImmData->getResidencyContainer().begin(), + kernelImmData->getResidencyContainer().end()); ModuleImp *moduleImp = reinterpret_cast(this->module); const auto indirectDetectionVersion = moduleImp->getTranslationUnit()->programInfo.indirectDetectionVersion; @@ -1138,7 +1138,7 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) { } for (auto rtStack : rtDispatchGlobalsInfo->rtStacks) { - this->residencyContainer.push_back(rtStack); + this->internalResidencyContainer.push_back(rtStack); } auto address = rtDispatchGlobalsInfo->rtDispatchGlobalsArray->getGpuAddressToPatch(); @@ -1151,7 +1151,7 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) { pImplicitArgs->rtGlobalBufferPtr = address; } - this->residencyContainer.push_back(rtDispatchGlobalsInfo->rtDispatchGlobalsArray); + this->internalResidencyContainer.push_back(rtDispatchGlobalsInfo->rtDispatchGlobalsArray); } this->midThreadPreemptionDisallowedForRayTracingKernels = productHelper.isMidThreadPreemptionDisallowedForRayTracingKernels(); return ZE_RESULT_SUCCESS; @@ -1160,7 +1160,7 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) { void KernelImp::createPrintfBuffer() { if (this->kernelImmData->getDescriptor().kernelAttributes.flags.usesPrintf || pImplicitArgs) { this->printfBuffer = PrintfHandler::createPrintfBuffer(this->module->getDevice()); - this->residencyContainer.push_back(printfBuffer); + this->internalResidencyContainer.push_back(printfBuffer); if (this->kernelImmData->getDescriptor().kernelAttributes.flags.usesPrintf) { NEO::patchPointer(ArrayRef(crossThreadData.get(), crossThreadDataSize), this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.printfSurfaceAddress, @@ -1186,14 +1186,14 @@ bool KernelImp::usesRegionGroupBarrier() const { } void KernelImp::patchSyncBuffer(NEO::GraphicsAllocation *gfxAllocation, size_t bufferOffset) { - this->residencyContainer.push_back(gfxAllocation); + this->internalResidencyContainer.push_back(gfxAllocation); NEO::patchPointer(ArrayRef(crossThreadData.get(), crossThreadDataSize), this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.syncBufferAddress, static_cast(ptrOffset(gfxAllocation->getGpuAddressToPatch(), bufferOffset))); } void KernelImp::patchRegionGroupBarrier(NEO::GraphicsAllocation *gfxAllocation, size_t bufferOffset) { - this->residencyContainer.push_back(gfxAllocation); + this->internalResidencyContainer.push_back(gfxAllocation); NEO::patchPointer(ArrayRef(crossThreadData.get(), crossThreadDataSize), this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.regionGroupBarrierBuffer, @@ -1335,7 +1335,7 @@ void KernelImp::setAssertBuffer() { NEO::patchPointer(ArrayRef(crossThreadData.get(), crossThreadDataSize), this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.assertBufferAddress, static_cast(assertHandler->getAssertBuffer()->getGpuAddressToPatch())); - this->residencyContainer.push_back(assertHandler->getAssertBuffer()); + this->internalResidencyContainer.push_back(assertHandler->getAssertBuffer()); if (pImplicitArgs) { pImplicitArgs->assertBufferPtr = static_cast(assertHandler->getAssertBuffer()->getGpuAddressToPatch()); diff --git a/level_zero/core/source/kernel/kernel_imp.h b/level_zero/core/source/kernel/kernel_imp.h index 14fdb48fdc..a6b9abb303 100644 --- a/level_zero/core/source/kernel/kernel_imp.h +++ b/level_zero/core/source/kernel/kernel_imp.h @@ -75,8 +75,12 @@ struct KernelImp : Kernel { const uint8_t *getCrossThreadData() const override { return crossThreadData.get(); } uint32_t getCrossThreadDataSize() const override { return crossThreadDataSize; } - const std::vector &getResidencyContainer() const override { - return residencyContainer; + const std::vector &getArgumentsResidencyContainer() const override { + return argumentsResidencyContainer; + } + + const std::vector &getInternalResidencyContainer() const override { + return internalResidencyContainer; } ze_result_t setArgImmediate(uint32_t argIndex, size_t argSize, const void *argVal); @@ -210,7 +214,8 @@ struct KernelImp : Kernel { typedef ze_result_t (KernelImp::*KernelArgHandler)(uint32_t argIndex, size_t argSize, const void *argVal); std::vector kernelArgInfos; std::vector kernelArgHandlers; - std::vector residencyContainer; + std::vector argumentsResidencyContainer; + std::vector internalResidencyContainer; std::mutex *devicePrintfKernelMutex = nullptr; NEO::GraphicsAllocation *printfBuffer = nullptr; diff --git a/level_zero/core/test/unit_tests/fixtures/module_fixture.h b/level_zero/core/test/unit_tests/fixtures/module_fixture.h index 6bcaa2149c..2879a6bf02 100644 --- a/level_zero/core/test/unit_tests/fixtures/module_fixture.h +++ b/level_zero/core/test/unit_tests/fixtures/module_fixture.h @@ -77,10 +77,12 @@ struct ModuleImmutableDataFixture : public DeviceFixture { class MockKernel : public WhiteBox { public: + using KernelImp::argumentsResidencyContainer; using KernelImp::crossThreadData; using KernelImp::crossThreadDataSize; using KernelImp::dynamicStateHeapData; using KernelImp::dynamicStateHeapDataSize; + using KernelImp::internalResidencyContainer; using KernelImp::kernelArgHandlers; using KernelImp::kernelHasIndirectAccess; using KernelImp::kernelImmData; @@ -92,7 +94,6 @@ struct ModuleImmutableDataFixture : public DeviceFixture { using KernelImp::printfBuffer; using KernelImp::privateMemoryGraphicsAllocation; using KernelImp::requiredWorkgroupOrder; - using KernelImp::residencyContainer; using KernelImp::surfaceStateHeapData; using KernelImp::surfaceStateHeapDataSize; using KernelImp::unifiedMemoryControls; diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index 194a6ff131..b843f90f09 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -738,7 +738,7 @@ class MockCommandListImmediateHw : public WhiteBox<::L0::CommandListCoreFamilyIm struct CmdListHelper { NEO::GraphicsAllocation *isaAllocation = nullptr; - NEO::ResidencyContainer residencyContainer; + NEO::ResidencyContainer argumentsResidencyContainer; ze_group_count_t threadGroupDimensions; const uint32_t *groupSize = nullptr; uint32_t useOnlyGlobalTimestamp = std::numeric_limits::max(); @@ -760,7 +760,7 @@ class MockCommandListForAppendLaunchKernel : public WhiteBox<::L0::CommandListCo const auto kernel = Kernel::fromHandle(kernelHandle); cmdListHelper.isaAllocation = kernel->getIsaAllocation(); - cmdListHelper.residencyContainer = kernel->getResidencyContainer(); + cmdListHelper.argumentsResidencyContainer = kernel->getArgumentsResidencyContainer(); cmdListHelper.groupSize = kernel->getGroupSize(); cmdListHelper.threadGroupDimensions = threadGroupDimensions; diff --git a/level_zero/core/test/unit_tests/mocks/mock_kernel.h b/level_zero/core/test/unit_tests/mocks/mock_kernel.h index 794d074535..e7d2650e33 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_kernel.h +++ b/level_zero/core/test/unit_tests/mocks/mock_kernel.h @@ -38,12 +38,14 @@ template <> struct WhiteBox<::L0::KernelImp> : public ::L0::KernelImp { using BaseClass = ::L0::KernelImp; using BaseClass::BaseClass; + using ::L0::KernelImp::argumentsResidencyContainer; using ::L0::KernelImp::createPrintfBuffer; using ::L0::KernelImp::crossThreadData; using ::L0::KernelImp::crossThreadDataSize; using ::L0::KernelImp::dynamicStateHeapData; using ::L0::KernelImp::dynamicStateHeapDataSize; using ::L0::KernelImp::groupSize; + using ::L0::KernelImp::internalResidencyContainer; using ::L0::KernelImp::isBindlessOffsetSet; using ::L0::KernelImp::kernelHasIndirectAccess; using ::L0::KernelImp::kernelImmData; @@ -60,7 +62,6 @@ struct WhiteBox<::L0::KernelImp> : public ::L0::KernelImp { using ::L0::KernelImp::pImplicitArgs; using ::L0::KernelImp::printfBuffer; using ::L0::KernelImp::requiredWorkgroupOrder; - using ::L0::KernelImp::residencyContainer; using ::L0::KernelImp::setAssertBuffer; using ::L0::KernelImp::slmArgsTotalSize; using ::L0::KernelImp::suggestGroupSizeCache; diff --git a/level_zero/core/test/unit_tests/sources/assert/test_assert.cpp b/level_zero/core/test/unit_tests/sources/assert/test_assert.cpp index 3e24ff3c10..cdd5c0129c 100644 --- a/level_zero/core/test/unit_tests/sources/assert/test_assert.cpp +++ b/level_zero/core/test/unit_tests/sources/assert/test_assert.cpp @@ -102,7 +102,7 @@ TEST(KernelAssert, GivenKernelWithAssertWhenSettingAssertBufferThenAssertBufferI auto assertBufferAddress = assertHandler->getAssertBuffer()->getGpuAddressToPatch(); EXPECT_TRUE(memcmp(kernel.crossThreadData.get(), &assertBufferAddress, sizeof(assertBufferAddress)) == 0); - EXPECT_TRUE(std::find(kernel.getResidencyContainer().begin(), kernel.getResidencyContainer().end(), assertHandler->getAssertBuffer()) != kernel.getResidencyContainer().end()); + EXPECT_TRUE(std::find(kernel.getInternalResidencyContainer().begin(), kernel.getInternalResidencyContainer().end(), assertHandler->getAssertBuffer()) != kernel.getInternalResidencyContainer().end()); } TEST(KernelAssert, GivenKernelWithAssertAndImplicitArgsWhenInitializingKernelThenImplicitArgsAssertBufferPtrIsSet) { diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp index a38de33874..7cd365dca4 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp @@ -83,7 +83,7 @@ HWTEST2_F(AppendQueryKernelTimestamps, givenCommandListWhenAppendQueryKernelTime bool containsDstPtr = false; bool gpuTimeStampAlloc = false; - for (auto &residentGfxAlloc : commandList.cmdListHelper.residencyContainer) { + for (auto &residentGfxAlloc : commandList.cmdListHelper.argumentsResidencyContainer) { if (residentGfxAlloc != nullptr) { if (residentGfxAlloc->getGpuAddress() == reinterpret_cast(alloc)) { @@ -148,7 +148,7 @@ HWTEST2_F(AppendQueryKernelTimestamps, givenCommandListWhenAppendQueryKernelTime bool containsDstPtr = false; - for (auto &a : commandList.cmdListHelper.residencyContainer) { + for (auto &a : commandList.cmdListHelper.argumentsResidencyContainer) { if (a != nullptr && a->getGpuAddress() == reinterpret_cast(alloc)) { containsDstPtr = true; } @@ -158,7 +158,7 @@ HWTEST2_F(AppendQueryKernelTimestamps, givenCommandListWhenAppendQueryKernelTime bool containOffsetPtr = false; - for (auto &a : commandList.cmdListHelper.residencyContainer) { + for (auto &a : commandList.cmdListHelper.argumentsResidencyContainer) { if (a != nullptr && a->getGpuAddress() == reinterpret_cast(offsetAlloc)) { containOffsetPtr = true; } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp index efb0bc0c1e..d2e10b2e4b 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp @@ -1491,7 +1491,7 @@ HWTEST2_F(CommandListAppendLaunchKernelMockModule, IsAtLeastXeHpCore) { NEO::MockGraphicsAllocation mockAllocation; NEO::GraphicsAllocation *allocation = &mockAllocation; - kernel->residencyContainer.push_back(allocation); + kernel->argumentsResidencyContainer.push_back(allocation); ze_group_count_t groupCount{1, 1, 1}; ze_result_t returnValue; diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp index ecaa1b2a00..3bfa5bdccf 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp @@ -790,7 +790,7 @@ HWTEST2_F(DeviceWithDualStorage, givenCmdListWithAppendedKernelAndUsmTransferAnd size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto gpuAlloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(ptr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); - kernel.residencyContainer.push_back(gpuAlloc); + kernel.argumentsResidencyContainer.push_back(gpuAlloc); ze_group_count_t dispatchKernelArguments{1, 1, 1}; CmdListKernelLaunchParams launchParams = {}; diff --git a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp index 7384917bf6..a851aa4f06 100644 --- a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp +++ b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp @@ -58,12 +58,14 @@ template struct WhiteBoxKernelHw : public KernelHw { using BaseClass = KernelHw; using BaseClass::BaseClass; + using ::L0::KernelImp::argumentsResidencyContainer; using ::L0::KernelImp::createPrintfBuffer; using ::L0::KernelImp::crossThreadData; using ::L0::KernelImp::crossThreadDataSize; using ::L0::KernelImp::dynamicStateHeapData; using ::L0::KernelImp::dynamicStateHeapDataSize; using ::L0::KernelImp::groupSize; + using ::L0::KernelImp::internalResidencyContainer; using ::L0::KernelImp::isBindlessOffsetSet; using ::L0::KernelImp::kernelImmData; using ::L0::KernelImp::kernelRequiresGenerationOfLocalIdsByRuntime; @@ -75,7 +77,6 @@ struct WhiteBoxKernelHw : public KernelHw { using ::L0::KernelImp::perThreadDataSizeForWholeThreadGroup; using ::L0::KernelImp::printfBuffer; using ::L0::KernelImp::requiredWorkgroupOrder; - using ::L0::KernelImp::residencyContainer; using ::L0::KernelImp::surfaceStateHeapData; using ::L0::KernelImp::surfaceStateHeapDataSize; using ::L0::KernelImp::unifiedMemoryControls; @@ -940,7 +941,7 @@ TEST_F(KernelImmutableDataTests, givenKernelInitializedWithPrivateMemoryThenCont createKernel(kernelWithPrivateMemory.get()); EXPECT_NE(nullptr, kernelWithPrivateMemory->privateMemoryGraphicsAllocation); - size_t sizeContainerWithPrivateMemory = kernelWithPrivateMemory->getResidencyContainer().size(); + size_t sizeContainerWithPrivateMemory = kernelWithPrivateMemory->getInternalResidencyContainer().size(); perHwThreadPrivateMemorySizeRequested = 0u; std::unique_ptr mockKernelImmDataForModuleWithoutPrivateMemory = std::make_unique(perHwThreadPrivateMemorySizeRequested); @@ -958,7 +959,7 @@ TEST_F(KernelImmutableDataTests, givenKernelInitializedWithPrivateMemoryThenCont createKernel(kernelWithoutPrivateMemory.get()); EXPECT_EQ(nullptr, kernelWithoutPrivateMemory->privateMemoryGraphicsAllocation); - size_t sizeContainerWithoutPrivateMemory = kernelWithoutPrivateMemory->getResidencyContainer().size(); + size_t sizeContainerWithoutPrivateMemory = kernelWithoutPrivateMemory->getInternalResidencyContainer().size(); EXPECT_EQ(sizeContainerWithoutPrivateMemory + 1u, sizeContainerWithPrivateMemory); } @@ -1043,7 +1044,7 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenRayTracingIsInitialized ASSERT_NE(nullptr, implicitArgs); EXPECT_EQ_VAL(implicitArgs->rtGlobalBufferPtr, rtDispatchGlobals->rtDispatchGlobalsArray->getGpuAddressToPatch()); - auto &residencyContainer = kernel->getResidencyContainer(); + auto &residencyContainer = kernel->getInternalResidencyContainer(); auto found = std::find(residencyContainer.begin(), residencyContainer.end(), rtMemoryBackedBuffer); EXPECT_EQ(residencyContainer.end(), found); @@ -2221,7 +2222,7 @@ TEST_F(KernelImpPatchBindlessTest, GivenKernelImpWhenPatchBindlessOffsetCalledTh EXPECT_EQ(ssPtr, expectedSsInHeap.ssPtr); EXPECT_TRUE(memcmp(const_cast(patchLocation), &patchValue, sizeof(patchValue)) == 0); - EXPECT_FALSE(std::find(kernel.getResidencyContainer().begin(), kernel.getResidencyContainer().end(), expectedSsInHeap.heapAllocation) != kernel.getResidencyContainer().end()); + EXPECT_FALSE(std::find(kernel.getArgumentsResidencyContainer().begin(), kernel.getArgumentsResidencyContainer().end(), expectedSsInHeap.heapAllocation) != kernel.getArgumentsResidencyContainer().end()); neoDevice->decRefInternal(); } @@ -2829,7 +2830,7 @@ HWTEST2_F(SetKernelArg, givenImageAndBindlessKernelWhenSetArgImageThenCopySurfac EXPECT_EQ(imageHW->passedSurfaceStateOffset, 0u); EXPECT_TRUE(kernel->isBindlessOffsetSet[3]); EXPECT_FALSE(kernel->usingSurfaceStateHeap[3]); - EXPECT_EQ(0, std::count(kernel->residencyContainer.begin(), kernel->residencyContainer.end(), expectedSsInHeap.heapAllocation)); + EXPECT_EQ(0, std::count(kernel->argumentsResidencyContainer.begin(), kernel->argumentsResidencyContainer.end(), expectedSsInHeap.heapAllocation)); } HWTEST2_F(SetKernelArg, givenNoGlobalAllocatorAndBindlessKernelWhenSetArgImageThenBindlessOffsetIsNotSetAndSshIsUsed, ImageSupport) { @@ -2951,7 +2952,7 @@ HWTEST2_F(SetKernelArg, givenImageBindlessKernelAndGlobalBindlessHelperWhenSetAr EXPECT_EQ(imageHW->passedRedescribedSurfaceStateOffset, 0u); EXPECT_TRUE(kernel->isBindlessOffsetSet[3]); EXPECT_FALSE(kernel->usingSurfaceStateHeap[3]); - EXPECT_EQ(0, std::count(kernel->residencyContainer.begin(), kernel->residencyContainer.end(), expectedSsInHeap.heapAllocation)); + EXPECT_EQ(0, std::count(kernel->argumentsResidencyContainer.begin(), kernel->argumentsResidencyContainer.end(), expectedSsInHeap.heapAllocation)); } HWTEST2_F(SetKernelArg, givenGlobalBindlessHelperAndImageViewWhenAllocatingBindlessSlotThenViewHasDifferentSlotThanParentImage, ImageSupport) { @@ -3091,7 +3092,7 @@ HWTEST2_F(SetKernelArg, givenImageAndBindlessKernelWhenSetArgRedescribedImageCal mockKernel.surfaceStateHeapData = std::make_unique(surfaceStateSize); mockKernel.descriptor.initBindlessOffsetToSurfaceState(); - mockKernel.residencyContainer.resize(1); + mockKernel.argumentsResidencyContainer.resize(1); mockKernel.isBindlessOffsetSet.resize(1, 0); mockKernel.usingSurfaceStateHeap.resize(1, false); @@ -3456,8 +3457,8 @@ TEST_F(PrintfTest, WhenCreatingPrintfBufferThenAllocationAddedToResidencyContain auto printfBufferAllocation = mockKernel.getPrintfBufferAllocation(); EXPECT_NE(nullptr, printfBufferAllocation); - EXPECT_NE(0u, mockKernel.residencyContainer.size()); - EXPECT_EQ(mockKernel.residencyContainer[mockKernel.residencyContainer.size() - 1], printfBufferAllocation); + EXPECT_NE(0u, mockKernel.internalResidencyContainer.size()); + EXPECT_EQ(mockKernel.internalResidencyContainer[mockKernel.internalResidencyContainer.size() - 1], printfBufferAllocation); } TEST_F(PrintfTest, WhenCreatingPrintfBufferThenCrossThreadDataIsPatched) { diff --git a/level_zero/core/test/unit_tests/sources/module/test_module.cpp b/level_zero/core/test/unit_tests/sources/module/test_module.cpp index 7e859ddaf1..5ccf2a4652 100644 --- a/level_zero/core/test/unit_tests/sources/module/test_module.cpp +++ b/level_zero/core/test/unit_tests/sources/module/test_module.cpp @@ -2597,7 +2597,7 @@ HWTEST_F(DeviceModuleSetArgBufferTest, auto argBufferValue = *reinterpret_cast(const_cast(argBufferPtr)); EXPECT_EQ(argBufferValue, reinterpret_cast(validBufferPtr)); - for (auto alloc : kernel->getResidencyContainer()) { + for (auto alloc : kernel->getArgumentsResidencyContainer()) { if (alloc && alloc->getGpuAddress() == reinterpret_cast(validBufferPtr)) { EXPECT_EQ(rootDeviceIndex, alloc->getRootDeviceIndex()); } @@ -2677,7 +2677,7 @@ HWTEST_F(MultiDeviceModuleSetArgBufferTest, bool phys1Resident = false; bool phys2Resident = false; - for (auto alloc : kernel->getResidencyContainer()) { + for (auto alloc : kernel->getArgumentsResidencyContainer()) { if (alloc && alloc->getGpuAddress() == reinterpret_cast(ptr)) { phys1Resident = true; } @@ -2747,7 +2747,7 @@ HWTEST_F(MultiDeviceModuleSetArgBufferTest, bool phys1Resident = false; bool phys2Resident = false; - for (auto alloc : kernel->getResidencyContainer()) { + for (auto alloc : kernel->getArgumentsResidencyContainer()) { if (alloc && alloc->getGpuAddress() == reinterpret_cast(ptr)) { phys1Resident = true; } @@ -2810,7 +2810,7 @@ HWTEST_F(MultiDeviceModuleSetArgBufferTest, kernel->setArgBuffer(0, sizeof(ptr), &ptr); bool phys1Resident = false; - for (auto alloc : kernel->getResidencyContainer()) { + for (auto alloc : kernel->getArgumentsResidencyContainer()) { if (alloc && alloc->getGpuAddress() == reinterpret_cast(ptr)) { phys1Resident = true; } @@ -2884,7 +2884,7 @@ HWTEST_F(MultiDeviceModuleSetArgBufferTest, bool phys1Resident = false; bool phys2Resident = false; - for (auto alloc : kernel->getResidencyContainer()) { + for (auto alloc : kernel->getArgumentsResidencyContainer()) { if (alloc && alloc->getGpuAddress() == reinterpret_cast(ptr)) { phys1Resident = true; } @@ -2933,7 +2933,7 @@ HWTEST_F(MultiDeviceModuleSetArgBufferTest, L0::KernelImp *kernel = reinterpret_cast(Kernel::fromHandle(kernelHandle)); kernel->setArgBuffer(0, sizeof(ptr), &ptr); - for (auto alloc : kernel->getResidencyContainer()) { + for (auto alloc : kernel->getArgumentsResidencyContainer()) { if (alloc && alloc->getGpuAddress() == reinterpret_cast(ptr)) { EXPECT_EQ(rootDeviceIndex, alloc->getRootDeviceIndex()); } @@ -3684,11 +3684,9 @@ HWTEST_F(PrintfModuleTest, GivenModuleWithPrintfWhenKernelIsCreatedThenPrintfAll kernelDesc.pKernelName = "test"; kernel->initialize(&kernelDesc); - auto &container = kernel->residencyContainer; + auto &container = kernel->internalResidencyContainer; auto printfPos = std::find(container.begin(), container.end(), kernel->getPrintfBufferAllocation()); EXPECT_NE(container.end(), printfPos); - bool correctPos = printfPos >= container.begin() + kernel->getImmutableData()->getDescriptor().payloadMappings.explicitArgs.size(); - EXPECT_TRUE(correctPos); } TEST(BuildOptions, givenNoSrcOptionNameInSrcNamesWhenMovingBuildOptionsThenFalseIsReturned) { diff --git a/level_zero/core/test/unit_tests/xe2_hpg_core/test_cmdlist_xe2_hpg_core.cpp b/level_zero/core/test/unit_tests/xe2_hpg_core/test_cmdlist_xe2_hpg_core.cpp index 7a6e981683..a6ec24aca0 100644 --- a/level_zero/core/test/unit_tests/xe2_hpg_core/test_cmdlist_xe2_hpg_core.cpp +++ b/level_zero/core/test/unit_tests/xe2_hpg_core/test_cmdlist_xe2_hpg_core.cpp @@ -54,7 +54,7 @@ HWTEST2_F(CommandListAppendLaunchKernelXe2HpgCore, givenAppendKernelWhenKernelNo ASSERT_NE(nullptr, allocData); auto kernelAllocation = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); ASSERT_NE(nullptr, kernelAllocation); - kernel.residencyContainer.push_back(kernelAllocation); + kernel.argumentsResidencyContainer.push_back(kernelAllocation); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; @@ -116,7 +116,7 @@ HWTEST2_F(CommandListAppendLaunchKernelXe2HpgCore, ASSERT_NE(nullptr, allocData); auto kernelAllocation = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); ASSERT_NE(nullptr, kernelAllocation); - kernel.residencyContainer.push_back(kernelAllocation); + kernel.argumentsResidencyContainer.push_back(kernelAllocation); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; @@ -230,7 +230,7 @@ HWTEST2_F(CommandListAppendLaunchKernelXe2HpgCore, ASSERT_NE(nullptr, allocData); auto kernelAllocation = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); ASSERT_NE(nullptr, kernelAllocation); - kernel.residencyContainer.push_back(kernelAllocation); + kernel.argumentsResidencyContainer.push_back(kernelAllocation); kernel.unifiedMemoryControls.indirectHostAllocationsAllowed = true; @@ -296,7 +296,7 @@ HWTEST2_F(CommandListAppendLaunchKernelXe2HpgCore, ASSERT_NE(nullptr, allocData); auto kernelAllocation = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); ASSERT_NE(nullptr, kernelAllocation); - kernel.residencyContainer.push_back(kernelAllocation); + kernel.argumentsResidencyContainer.push_back(kernelAllocation); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; @@ -358,7 +358,7 @@ HWTEST2_F(CommandListAppendLaunchKernelXe2HpgCore, ASSERT_NE(nullptr, allocData); auto kernelAllocation = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); ASSERT_NE(nullptr, kernelAllocation); - kernel.residencyContainer.push_back(kernelAllocation); + kernel.argumentsResidencyContainer.push_back(kernelAllocation); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; diff --git a/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp b/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp index f408438d1c..4ba16bd347 100644 --- a/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp +++ b/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp @@ -756,7 +756,7 @@ HWTEST2_F(CommandListAppendLaunchKernelXeHpcCore, ASSERT_NE(nullptr, allocData); auto kernelAllocation = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); ASSERT_NE(nullptr, kernelAllocation); - kernel.residencyContainer.push_back(kernelAllocation); + kernel.argumentsResidencyContainer.push_back(kernelAllocation); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; @@ -824,7 +824,7 @@ HWTEST2_F(CommandListAppendLaunchKernelXeHpcCore, ASSERT_NE(nullptr, allocData); auto kernelAllocation = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); ASSERT_NE(nullptr, kernelAllocation); - kernel.residencyContainer.push_back(kernelAllocation); + kernel.argumentsResidencyContainer.push_back(kernelAllocation); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; @@ -950,7 +950,7 @@ HWTEST2_F(CommandListAppendLaunchKernelXeHpcCore, ASSERT_NE(nullptr, allocData); auto kernelAllocation = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); ASSERT_NE(nullptr, kernelAllocation); - kernel.residencyContainer.push_back(kernelAllocation); + kernel.argumentsResidencyContainer.push_back(kernelAllocation); kernel.unifiedMemoryControls.indirectHostAllocationsAllowed = true; @@ -1022,7 +1022,7 @@ HWTEST2_F(CommandListAppendLaunchKernelXeHpcCore, ASSERT_NE(nullptr, allocData); auto kernelAllocation = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); ASSERT_NE(nullptr, kernelAllocation); - kernel.residencyContainer.push_back(kernelAllocation); + kernel.argumentsResidencyContainer.push_back(kernelAllocation); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; @@ -1090,7 +1090,7 @@ HWTEST2_F(CommandListAppendLaunchKernelXeHpcCore, ASSERT_NE(nullptr, allocData); auto kernelAllocation = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); ASSERT_NE(nullptr, kernelAllocation); - kernel.residencyContainer.push_back(kernelAllocation); + kernel.argumentsResidencyContainer.push_back(kernelAllocation); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1;