diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index 4d2108e111..bcebaa9ff6 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -127,13 +127,12 @@ inline void patch(const SrcT &src, void *dst, CrossThreadDataOffset dstOffsetByt } } -void Kernel::patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const ArgDescPointer &arg) { +void Kernel::patchWithImplicitSurface(uint64_t ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const ArgDescPointer &arg) { if ((nullptr != crossThreadData) && isValidOffset(arg.stateless)) { auto pp = ptrOffset(crossThreadData, arg.stateless); - uintptr_t addressToPatch = reinterpret_cast(ptrToPatchInCrossThreadData); - patchWithRequiredSize(pp, arg.pointerSize, addressToPatch); + patchWithRequiredSize(pp, arg.pointerSize, ptrToPatchInCrossThreadData); if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { - PatchInfoData patchInfoData(addressToPatch, 0u, PatchInfoAllocationType::KernelArg, reinterpret_cast(crossThreadData), arg.stateless, PatchInfoAllocationType::IndirectObjectHeap, arg.pointerSize); + PatchInfoData patchInfoData(ptrToPatchInCrossThreadData, 0u, PatchInfoAllocationType::KernelArg, reinterpret_cast(crossThreadData), arg.stateless, PatchInfoAllocationType::IndirectObjectHeap, arg.pointerSize); this->patchInfoDataList.push_back(patchInfoData); } } @@ -227,18 +226,18 @@ cl_int Kernel::initialize() { if (isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless)) { DEBUG_BREAK_IF(program->getConstantSurface(rootDeviceIndex) == nullptr); - uintptr_t constMemory = isBuiltIn ? (uintptr_t)program->getConstantSurface(rootDeviceIndex)->getUnderlyingBuffer() : (uintptr_t)program->getConstantSurface(rootDeviceIndex)->getGpuAddressToPatch(); + uint64_t constMemory = isBuiltIn ? castToUint64(program->getConstantSurface(rootDeviceIndex)->getUnderlyingBuffer()) : program->getConstantSurface(rootDeviceIndex)->getGpuAddressToPatch(); const auto &arg = kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress; - patchWithImplicitSurface(reinterpret_cast(constMemory), *program->getConstantSurface(rootDeviceIndex), arg); + patchWithImplicitSurface(constMemory, *program->getConstantSurface(rootDeviceIndex), arg); } if (isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.stateless)) { DEBUG_BREAK_IF(program->getGlobalSurface(rootDeviceIndex) == nullptr); - uintptr_t globalMemory = isBuiltIn ? (uintptr_t)program->getGlobalSurface(rootDeviceIndex)->getUnderlyingBuffer() : (uintptr_t)program->getGlobalSurface(rootDeviceIndex)->getGpuAddressToPatch(); + uint64_t globalMemory = isBuiltIn ? castToUint64(program->getGlobalSurface(rootDeviceIndex)->getUnderlyingBuffer()) : program->getGlobalSurface(rootDeviceIndex)->getGpuAddressToPatch(); const auto &arg = kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress; - patchWithImplicitSurface(reinterpret_cast(globalMemory), *program->getGlobalSurface(rootDeviceIndex), arg); + patchWithImplicitSurface(globalMemory, *program->getGlobalSurface(rootDeviceIndex), arg); } // Patch Surface State Heap @@ -364,7 +363,7 @@ cl_int Kernel::patchPrivateSurface() { } const auto &privateMemoryAddress = kernelDescriptor.payloadMappings.implicitArgs.privateMemoryAddress; - patchWithImplicitSurface(reinterpret_cast(privateSurface->getGpuAddressToPatch()), *privateSurface, privateMemoryAddress); + patchWithImplicitSurface(privateSurface->getGpuAddressToPatch(), *privateSurface, privateMemoryAddress); } return CL_SUCCESS; } diff --git a/opencl/source/kernel/kernel.h b/opencl/source/kernel/kernel.h index 0aa9d4b44a..f4905a757c 100644 --- a/opencl/source/kernel/kernel.h +++ b/opencl/source/kernel/kernel.h @@ -465,7 +465,7 @@ class Kernel : public ReferenceTrackedObject { void *patchBufferOffset(const ArgDescPointer &argAsPtr, void *svmPtr, GraphicsAllocation *svmAlloc); - void patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const ArgDescPointer &arg); + void patchWithImplicitSurface(uint64_t ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const ArgDescPointer &arg); void provideInitializationHints(); diff --git a/opencl/source/mem_obj/buffer.cpp b/opencl/source/mem_obj/buffer.cpp index 92ed049978..67968df38d 100644 --- a/opencl/source/mem_obj/buffer.cpp +++ b/opencl/source/mem_obj/buffer.cpp @@ -470,7 +470,7 @@ Buffer *Buffer::create(Context *context, auto allocation = allocationInfo.memory; auto memoryStorage = allocation->getUnderlyingBuffer(); if (context->getRootDeviceIndices().size() > 1) { - multiGraphicsAllocation.setMultiStorage(true); + multiGraphicsAllocation.setMultiStorage(!MemoryPoolHelper::isSystemMemoryPool(allocation->getMemoryPool())); } auto pBuffer = createBufferHw(context, @@ -669,7 +669,7 @@ Buffer *Buffer::createSubBuffer(cl_mem_flags flags, uint64_t Buffer::setArgStateless(void *memory, uint32_t patchSize, uint32_t rootDeviceIndex, bool set32BitAddressing) { // Subbuffers have offset that graphicsAllocation is not aware of auto graphicsAllocation = multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex); - uintptr_t addressToPatch = ((set32BitAddressing) ? static_cast(graphicsAllocation->getGpuAddressToPatch()) : static_cast(graphicsAllocation->getGpuAddress())) + this->offset; + auto addressToPatch = ((set32BitAddressing) ? graphicsAllocation->getGpuAddressToPatch() : graphicsAllocation->getGpuAddress()) + this->offset; DEBUG_BREAK_IF(!(graphicsAllocation->isLocked() || (addressToPatch != 0) || (graphicsAllocation->getGpuBaseAddress() != 0) || (this->getCpuAddress() == nullptr && graphicsAllocation->peekSharedHandle()))); diff --git a/opencl/source/mem_obj/pipe.cpp b/opencl/source/mem_obj/pipe.cpp index 4facad179c..a27f1eafba 100644 --- a/opencl/source/mem_obj/pipe.cpp +++ b/opencl/source/mem_obj/pipe.cpp @@ -126,7 +126,7 @@ cl_int Pipe::getPipeInfo(cl_image_info paramName, } void Pipe::setPipeArg(void *memory, uint32_t patchSize, uint32_t rootDeviceIndex) { - patchWithRequiredSize(memory, patchSize, static_cast(multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex)->getGpuAddressToPatch())); + patchWithRequiredSize(memory, patchSize, multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex)->getGpuAddressToPatch()); } Pipe::~Pipe() = default; diff --git a/opencl/test/unit_test/command_queue/command_queue_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_tests.cpp index a040a91be1..a664795eac 100644 --- a/opencl/test/unit_test/command_queue/command_queue_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_tests.cpp @@ -1721,11 +1721,18 @@ TEST(CommandQueue, givenBufferWhenMultiStorageIsNotSetThenDontRequireMigrations) alignedFree(dstPtr); } -TEST(CommandQueue, givenBuffersInLocalMemoryWhenMultiGraphicsAllocationsRequireMigrationsThenMigrateTheAllocations) { +using MultiRootDeviceCommandQueueTest = ::testing::Test; +HWTEST2_F(MultiRootDeviceCommandQueueTest, givenBuffersInLocalMemoryWhenMultiGraphicsAllocationsRequireMigrationsThenMigrateTheAllocations, IsAtLeastGen12lp) { MockDefaultContext context{true}; ASSERT_TRUE(context.getNumDevices() > 1); ASSERT_TRUE(context.getRootDeviceIndices().size() > 1); + auto memoryManager = static_cast(context.getMemoryManager()); + for (auto &rootDeviceIndex : context.getRootDeviceIndices()) { + + memoryManager->localMemorySupported[rootDeviceIndex] = true; + } + auto sourceRootDeviceIndex = context.getDevice(0)->getRootDeviceIndex(); EXPECT_EQ(0u, sourceRootDeviceIndex); diff --git a/opencl/test/unit_test/command_stream/cl_command_stream_receiver_tests.cpp b/opencl/test/unit_test/command_stream/cl_command_stream_receiver_tests.cpp index 1a8332a875..e78034c6b5 100644 --- a/opencl/test/unit_test/command_stream/cl_command_stream_receiver_tests.cpp +++ b/opencl/test/unit_test/command_stream/cl_command_stream_receiver_tests.cpp @@ -80,6 +80,8 @@ using CommandStreamReceiverMultiRootDeviceTest = MultiRootDeviceFixture; TEST_F(CommandStreamReceiverMultiRootDeviceTest, WhenCreatingCommandStreamGraphicsAllocationsThenTheyHaveCorrectRootDeviceIndex) { auto commandStreamReceiver = &device1->getGpgpuCommandStreamReceiver(); + mockMemoryManager->localMemorySupported[1] = false; + ASSERT_NE(nullptr, commandStreamReceiver); EXPECT_EQ(expectedRootDeviceIndex, commandStreamReceiver->getRootDeviceIndex()); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp index 50e2cf1175..f1011c5b54 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp @@ -938,6 +938,10 @@ struct BcsCrossDeviceMigrationTests : public ::testing::Test { cl_device_id devices[] = {device1, device2}; context = std::make_unique(ClDeviceVector(devices, 2), false); + auto memoryManager = static_cast(context->getMemoryManager()); + for (auto &rootDeviceIndex : context->getRootDeviceIndices()) { + memoryManager->localMemorySupported[rootDeviceIndex] = true; + } } void TearDown() override { diff --git a/opencl/test/unit_test/fixtures/multi_root_device_fixture.h b/opencl/test/unit_test/fixtures/multi_root_device_fixture.h index 5069c726fb..17f0073240 100644 --- a/opencl/test/unit_test/fixtures/multi_root_device_fixture.h +++ b/opencl/test/unit_test/fixtures/multi_root_device_fixture.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2022 Intel Corporation + * Copyright (C) 2019-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -31,6 +31,9 @@ class MultiRootDeviceFixture : public ::testing::Test { mockMemoryManager = static_cast(device1->getMemoryManager()); ASSERT_EQ(mockMemoryManager, device1->getMemoryManager()); + for (auto &rootDeviceIndex : context->getRootDeviceIndices()) { + mockMemoryManager->localMemorySupported[rootDeviceIndex] = true; + } } const uint32_t expectedRootDeviceIndex = 1; @@ -52,6 +55,9 @@ class MultiRootDeviceWithSubDevicesFixture : public ::testing::Test { context.reset(new MockContext(ClDeviceVector(devices, 3), false)); mockMemoryManager = static_cast(device1->getMemoryManager()); ASSERT_EQ(mockMemoryManager, device1->getMemoryManager()); + for (auto &rootDeviceIndex : context->getRootDeviceIndices()) { + mockMemoryManager->localMemorySupported[rootDeviceIndex] = true; + } } const uint32_t expectedRootDeviceIndex = 1; diff --git a/opencl/test/unit_test/kernel/clone_kernel_tests.cpp b/opencl/test/unit_test/kernel/clone_kernel_tests.cpp index 0bdfe1a0c4..ca4988d633 100644 --- a/opencl/test/unit_test/kernel/clone_kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/clone_kernel_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -185,7 +185,7 @@ TEST_F(CloneKernelTest, givenArgLocalWhenCloningKernelThenKernelInfoIsCorrect) { } TEST_F(CloneKernelTest, givenArgBufferWhenCloningKernelThenKernelInfoIsCorrect) { - pKernelInfo->addArgBuffer(0, 0x20, sizeof(void *)); + pKernelInfo->addArgBuffer(0, 0x20, sizeof(uint64_t)); auto buffer = clUniquePtr(Buffer::create(context.get(), 0, MemoryConstants::pageSize, nullptr, retVal)); cl_mem memObj = buffer.get(); @@ -220,14 +220,14 @@ TEST_F(CloneKernelTest, givenArgBufferWhenCloningKernelThenKernelInfoIsCorrect) EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId); - auto pKernelArg = (cl_mem *)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() + - pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as().stateless); - EXPECT_EQ(buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddressToPatch(), reinterpret_cast(*pKernelArg)); + auto pKernelArg = reinterpret_cast(pClonedKernel[rootDeviceIndex]->getCrossThreadData() + + pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as().stateless); + EXPECT_EQ(buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddressToPatch(), *pKernelArg); } } TEST_F(CloneKernelTest, givenArgPipeWhenCloningKernelThenKernelInfoIsCorrect) { - pKernelInfo->addArgPipe(0, 0x20, sizeof(void *)); + pKernelInfo->addArgPipe(0, 0x20, sizeof(uint64_t)); auto pipe = clUniquePtr(Pipe::create(context.get(), 0, 1, 20, nullptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); @@ -260,9 +260,9 @@ TEST_F(CloneKernelTest, givenArgPipeWhenCloningKernelThenKernelInfoIsCorrect) { EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).isPatched); EXPECT_EQ(pSourceKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId, pClonedKernel[rootDeviceIndex]->getKernelArgInfo(0).allocId); - auto pKernelArg = (cl_mem *)(pClonedKernel[rootDeviceIndex]->getCrossThreadData() + - pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as().stateless); - EXPECT_EQ(pipe->getGraphicsAllocation(rootDeviceIndex)->getGpuAddressToPatch(), reinterpret_cast(*pKernelArg)); + auto pKernelArg = reinterpret_cast(pClonedKernel[rootDeviceIndex]->getCrossThreadData() + + pClonedKernel[rootDeviceIndex]->getKernelInfo().getArgDescriptorAt(0).as().stateless); + EXPECT_EQ(pipe->getGraphicsAllocation(rootDeviceIndex)->getGpuAddressToPatch(), *pKernelArg); } TEST_F(CloneKernelTest, givenArgImageWhenCloningKernelThenKernelInfoIsCorrect) { diff --git a/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp b/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp index db660fda36..790f9e0f67 100644 --- a/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_arg_svm_tests.cpp @@ -284,7 +284,7 @@ HWTEST_F(KernelArgSvmTest, WhenPatchingWithImplicitSurfaceThenPatchIsApplied) { RENDER_SURFACE_STATE *surfState = reinterpret_cast(pKernel->getSurfaceStateHeap()); memset(surfState, 0, rendSurfSize); - pKernel->patchWithImplicitSurface(ptrToPatch, svmAlloc, pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress); + pKernel->patchWithImplicitSurface(castToUint64(ptrToPatch), svmAlloc, pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress); // verify cross thread data was properly patched EXPECT_EQ(ptrToPatch, *reinterpret_cast(pKernel->getCrossThreadData())); @@ -305,7 +305,7 @@ HWTEST_F(KernelArgSvmTest, WhenPatchingWithImplicitSurfaceThenPatchIsApplied) { // when cross thread and ssh data is not available then should not do anything pKernel->setCrossThreadData(nullptr, 0); pKernel->setSshLocal(nullptr, 0); - pKernel->patchWithImplicitSurface(ptrToPatch, svmAlloc, pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress); + pKernel->patchWithImplicitSurface(castToUint64(ptrToPatch), svmAlloc, pKernelInfo->kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress); } } diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index 7dbd5de843..d66c22331b 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -2813,7 +2813,7 @@ TEST(KernelTest, givenKernelWithPatchInfoCollectionEnabledWhenPatchWithImplicitS kernel.kernelInfo.addArgBuffer(0, 0, sizeof(void *)); uint64_t crossThreadData = 0; EXPECT_EQ(0u, kernel.mockKernel->getPatchInfoDataList().size()); - kernel.mockKernel->patchWithImplicitSurface(&crossThreadData, mockAllocation, kernel.kernelInfo.argAsPtr(0)); + kernel.mockKernel->patchWithImplicitSurface(castToUint64(&crossThreadData), mockAllocation, kernel.kernelInfo.argAsPtr(0)); EXPECT_EQ(1u, kernel.mockKernel->getPatchInfoDataList().size()); } @@ -2823,7 +2823,7 @@ TEST(KernelTest, givenKernelWithPatchInfoCollecitonEnabledAndArgumentWithInvalid MockGraphicsAllocation mockAllocation; kernel.kernelInfo.addArgBuffer(0, undefined, sizeof(void *)); uint64_t crossThreadData = 0; - kernel.mockKernel->patchWithImplicitSurface(&crossThreadData, mockAllocation, kernel.kernelInfo.argAsPtr(0)); + kernel.mockKernel->patchWithImplicitSurface(castToUint64(&crossThreadData), mockAllocation, kernel.kernelInfo.argAsPtr(0)); EXPECT_EQ(0u, kernel.mockKernel->getPatchInfoDataList().size()); } @@ -2837,7 +2837,7 @@ TEST(KernelTest, givenKernelWithPatchInfoCollectionEnabledAndValidArgumentWhenPa kernel.kernelInfo.addArgBuffer(0, 0, sizeof(void *)); uint64_t crossThreadData = 0; EXPECT_EQ(0u, kernel.mockKernel->getPatchInfoDataList().size()); - kernel.mockKernel->patchWithImplicitSurface(&crossThreadData, mockAllocation, kernel.kernelInfo.argAsPtr(0)); + kernel.mockKernel->patchWithImplicitSurface(castToUint64(&crossThreadData), mockAllocation, kernel.kernelInfo.argAsPtr(0)); EXPECT_EQ(1u, kernel.mockKernel->getPatchInfoDataList().size()); } @@ -2848,7 +2848,7 @@ TEST(KernelTest, givenKernelWithPatchInfoCollectionDisabledWhenPatchWithImplicit kernel.kernelInfo.addArgBuffer(0, 0, sizeof(void *)); uint64_t crossThreadData = 0; EXPECT_EQ(0u, kernel.mockKernel->getPatchInfoDataList().size()); - kernel.mockKernel->patchWithImplicitSurface(&crossThreadData, mockAllocation, kernel.kernelInfo.argAsPtr(0)); + kernel.mockKernel->patchWithImplicitSurface(castToUint64(&crossThreadData), mockAllocation, kernel.kernelInfo.argAsPtr(0)); EXPECT_EQ(0u, kernel.mockKernel->getPatchInfoDataList().size()); } diff --git a/opencl/test/unit_test/mem_obj/buffer_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_tests.cpp index 0b8a16bdf2..061fdda830 100644 --- a/opencl/test/unit_test/mem_obj/buffer_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_tests.cpp @@ -2164,16 +2164,29 @@ TEST_F(MultiRootDeviceBufferTest, WhenBufferIsCreatedThenBufferGraphicsAllocatio EXPECT_EQ(expectedRootDeviceIndex, graphicsAllocation->getRootDeviceIndex()); } -TEST_F(MultiRootDeviceBufferTest, WhenBufferIsCreatedThenBufferMultiGraphicsAllocationIsCreatedInSystemMemoryPool) { +HWTEST2_F(MultiRootDeviceBufferTest, WhenBufferIsCreatedThenBufferMultiGraphicsAllocationIsCreatedInLocalMemoryPool, IsAtLeastGen12lp) { cl_int retVal = 0; std::unique_ptr buffer1(Buffer::create(context.get(), 0, MemoryConstants::pageSize, nullptr, retVal)); - EXPECT_TRUE(MemoryPoolHelper::isSystemMemoryPool(buffer1->getMultiGraphicsAllocation().getGraphicsAllocation(1u)->getMemoryPool())); - EXPECT_TRUE(MemoryPoolHelper::isSystemMemoryPool(buffer1->getMultiGraphicsAllocation().getGraphicsAllocation(2u)->getMemoryPool())); + EXPECT_FALSE(MemoryPoolHelper::isSystemMemoryPool(buffer1->getMultiGraphicsAllocation().getGraphicsAllocation(1u)->getMemoryPool())); + EXPECT_FALSE(MemoryPoolHelper::isSystemMemoryPool(buffer1->getMultiGraphicsAllocation().getGraphicsAllocation(2u)->getMemoryPool())); + EXPECT_TRUE(buffer1->getMultiGraphicsAllocation().requiresMigrations()); } -TEST(MultiRootDeviceBufferTest2, WhenBufferIsCreatedThenSecondAndSubsequentAllocationsAreCreatedFromExisitingStorage) { +HWTEST2_F(MultiRootDeviceBufferTest, givenDisableLocalMemoryWhenBufferIsCreatedThenBufferMultiGraphicsAllocationsDontNeedMigrations, IsAtLeastGen12lp) { + cl_int retVal = 0; + MockDefaultContext context; + + std::unique_ptr buffer1(Buffer::create(&context, 0, MemoryConstants::pageSize, nullptr, retVal)); + + EXPECT_TRUE(MemoryPoolHelper::isSystemMemoryPool(buffer1->getMultiGraphicsAllocation().getGraphicsAllocation(1u)->getMemoryPool())); + EXPECT_TRUE(MemoryPoolHelper::isSystemMemoryPool(buffer1->getMultiGraphicsAllocation().getGraphicsAllocation(2u)->getMemoryPool())); + EXPECT_FALSE(buffer1->getMultiGraphicsAllocation().requiresMigrations()); +} + +using MultiRootDeviceBufferTest2 = ::testing::Test; +HWTEST2_F(MultiRootDeviceBufferTest2, WhenBufferIsCreatedThenSecondAndSubsequentAllocationsAreCreatedFromExisitingStorage, IsAtLeastGen12lp) { cl_int retVal = 0; MockDefaultContext context; auto memoryManager = static_cast(context.getMemoryManager()); @@ -2191,8 +2204,13 @@ TEST(MultiRootDeviceBufferTest2, WhenBufferIsCreatedThenSecondAndSubsequentAlloc EXPECT_EQ(memoryManager->allocationsFromExistingStorage[1], buffer->getMultiGraphicsAllocation().getGraphicsAllocation(2u)); } -TEST(MultiRootDeviceBufferTest2, givenHostPtrToCopyWhenBufferIsCreatedWithMultiStorageThenMemoryIsPutInFirstDeviceInContext) { +HWTEST2_F(MultiRootDeviceBufferTest2, givenHostPtrToCopyWhenBufferIsCreatedWithMultiStorageThenMemoryIsPutInFirstDeviceInContext, IsAtLeastGen12lp) { UltClDeviceFactory deviceFactory{2, 0}; + auto memoryManager = static_cast(deviceFactory.rootDevices[0]->getMemoryManager()); + for (auto &rootDeviceIndex : {0, 1}) { + memoryManager->localMemorySupported[rootDeviceIndex] = true; + } + { cl_device_id deviceIds[] = { deviceFactory.rootDevices[0], diff --git a/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp b/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp index 892d241338..6a2eb0d7d2 100644 --- a/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp +++ b/opencl/test/unit_test/memory_manager/memory_manager_tests.cpp @@ -2713,7 +2713,7 @@ TEST_F(MemoryManagerMultiRootDeviceTests, WhenAllocatingGlobalSurfaceThenItHasCo if (device1->getMemoryManager()->isLimitedRange(expectedRootDeviceIndex)) { device1->getMemoryManager()->freeGraphicsMemory(allocation); } else { - context->getSVMAllocsManager()->freeSVMAlloc(allocation->getUnderlyingBuffer()); + context->getSVMAllocsManager()->freeSVMAlloc(reinterpret_cast(allocation->getGpuAddress())); } } diff --git a/opencl/test/unit_test/memory_manager/migraton_controller_tests.cpp b/opencl/test/unit_test/memory_manager/migraton_controller_tests.cpp index 69c889a0b3..a6bd70c875 100644 --- a/opencl/test/unit_test/memory_manager/migraton_controller_tests.cpp +++ b/opencl/test/unit_test/memory_manager/migraton_controller_tests.cpp @@ -27,6 +27,10 @@ struct MigrationControllerTests : public ::testing::Test { pCsr0 = context.getDevice(0)->getDefaultEngine().commandStreamReceiver; pCsr1 = context.getDevice(1)->getDefaultEngine().commandStreamReceiver; memoryManager = static_cast(context.getMemoryManager()); + for (auto &rootDeviceIndex : context.getRootDeviceIndices()) { + + memoryManager->localMemorySupported[rootDeviceIndex] = true; + } } void TearDown() override { } @@ -36,7 +40,7 @@ struct MigrationControllerTests : public ::testing::Test { MockMemoryManager *memoryManager = nullptr; }; -TEST_F(MigrationControllerTests, givenAllocationWithUndefinedLocationWhenHandleMigrationThenNoMigrationIsPerformedAndProperLocationIsSet) { +HWTEST2_F(MigrationControllerTests, givenAllocationWithUndefinedLocationWhenHandleMigrationThenNoMigrationIsPerformedAndProperLocationIsSet, IsAtLeastGen12lp) { std::unique_ptr pImage(Image1dHelper<>::create(&context)); EXPECT_TRUE(pImage->getMultiGraphicsAllocation().requiresMigrations()); @@ -49,7 +53,7 @@ TEST_F(MigrationControllerTests, givenAllocationWithUndefinedLocationWhenHandleM EXPECT_EQ(0u, pCsr0->peekLatestFlushedTaskCount()); } -TEST_F(MigrationControllerTests, givenAllocationWithDefinedLocationWhenHandleMigrationToTheSameLocationThenDontMigrateMemory) { +HWTEST2_F(MigrationControllerTests, givenAllocationWithDefinedLocationWhenHandleMigrationToTheSameLocationThenDontMigrateMemory, IsAtLeastGen12lp) { std::unique_ptr pImage(Image1dHelper<>::create(&context)); EXPECT_TRUE(pImage->getMultiGraphicsAllocation().requiresMigrations()); @@ -63,7 +67,7 @@ TEST_F(MigrationControllerTests, givenAllocationWithDefinedLocationWhenHandleMig EXPECT_EQ(0u, pCsr1->peekLatestFlushedTaskCount()); } -TEST_F(MigrationControllerTests, givenNotLockableImageAllocationWithDefinedLocationWhenHandleMigrationToDifferentLocationThenMigrateMemoryViaReadWriteImage) { +HWTEST2_F(MigrationControllerTests, givenNotLockableImageAllocationWithDefinedLocationWhenHandleMigrationToDifferentLocationThenMigrateMemoryViaReadWriteImage, IsAtLeastGen12lp) { REQUIRE_IMAGE_SUPPORT_OR_SKIP(&context); std::unique_ptr pImage(Image1dHelper<>::create(&context)); EXPECT_TRUE(pImage->getMultiGraphicsAllocation().requiresMigrations()); @@ -88,7 +92,7 @@ TEST_F(MigrationControllerTests, givenNotLockableImageAllocationWithDefinedLocat EXPECT_EQ(1u, pCsr0->peekLatestFlushedTaskCount()); } -TEST_F(MigrationControllerTests, givenNotLockableBufferAllocationWithDefinedLocationWhenHandleMigrationToDifferentLocationThenMigrateMemoryViaReadWriteBuffer) { +HWTEST2_F(MigrationControllerTests, givenNotLockableBufferAllocationWithDefinedLocationWhenHandleMigrationToDifferentLocationThenMigrateMemoryViaReadWriteBuffer, IsAtLeastGen12lp) { DebugManagerStateRestore restorer; DebugManager.flags.DoCpuCopyOnReadBuffer.set(0); DebugManager.flags.DoCpuCopyOnWriteBuffer.set(0); @@ -122,7 +126,7 @@ TEST_F(MigrationControllerTests, givenNotLockableBufferAllocationWithDefinedLoca EXPECT_EQ(1u, pCsr0->peekLatestFlushedTaskCount()); } -TEST_F(MigrationControllerTests, givenLockableBufferAllocationWithDefinedLocationWhenHandleMigrationToDifferentLocationThenMigrateMemoryViaLockMemory) { +HWTEST2_F(MigrationControllerTests, givenLockableBufferAllocationWithDefinedLocationWhenHandleMigrationToDifferentLocationThenMigrateMemoryViaLockMemory, IsAtLeastGen12lp) { std::unique_ptr pBuffer(BufferHelper<>::create(&context)); const_cast(pBuffer->getMultiGraphicsAllocation()).setMultiStorage(true); EXPECT_TRUE(pBuffer->getMultiGraphicsAllocation().requiresMigrations()); @@ -144,7 +148,7 @@ TEST_F(MigrationControllerTests, givenLockableBufferAllocationWithDefinedLocatio EXPECT_EQ(0u, pCsr0->peekLatestFlushedTaskCount()); } -TEST_F(MigrationControllerTests, givenMultiGraphicsAllocationUsedInOneCsrWhenHandlingMigrationToOtherCsrOnTheSameRootDeviceThenWaitOnCpuForTheFirstCsrCompletion) { +HWTEST2_F(MigrationControllerTests, givenMultiGraphicsAllocationUsedInOneCsrWhenHandlingMigrationToOtherCsrOnTheSameRootDeviceThenWaitOnCpuForTheFirstCsrCompletion, IsAtLeastGen12lp) { VariableBackup createFuncBackup{&MultiGraphicsAllocation::createMigrationSyncDataFunc}; MultiGraphicsAllocation::createMigrationSyncDataFunc = [](size_t size) -> MigrationSyncData * { return new MockMigrationSyncData(size); @@ -170,7 +174,7 @@ TEST_F(MigrationControllerTests, givenMultiGraphicsAllocationUsedInOneCsrWhenHan EXPECT_EQ(1u, migrationSyncData->waitOnCpuCalled); } -TEST_F(MigrationControllerTests, givenMultiGraphicsAllocationUsedInOneCsrWhenHandlingMigrationToTheSameCsrThenDontWaitOnCpu) { +HWTEST2_F(MigrationControllerTests, givenMultiGraphicsAllocationUsedInOneCsrWhenHandlingMigrationToTheSameCsrThenDontWaitOnCpu, IsAtLeastGen12lp) { VariableBackup createFuncBackup{&MultiGraphicsAllocation::createMigrationSyncDataFunc}; MultiGraphicsAllocation::createMigrationSyncDataFunc = [](size_t size) -> MigrationSyncData * { return new MockMigrationSyncData(size); @@ -197,7 +201,7 @@ TEST_F(MigrationControllerTests, givenMultiGraphicsAllocationUsedInOneCsrWhenHan EXPECT_EQ(0u, migrationSyncData->waitOnCpuCalled); } -TEST_F(MigrationControllerTests, whenHandleMigrationThenProperTagAddressAndTaskCountIsSet) { +HWTEST2_F(MigrationControllerTests, whenHandleMigrationThenProperTagAddressAndTaskCountIsSet, IsAtLeastGen12lp) { VariableBackup createFuncBackup{&MultiGraphicsAllocation::createMigrationSyncDataFunc}; MultiGraphicsAllocation::createMigrationSyncDataFunc = [](size_t size) -> MigrationSyncData * { return new MockMigrationSyncData(size); @@ -216,7 +220,7 @@ TEST_F(MigrationControllerTests, whenHandleMigrationThenProperTagAddressAndTaskC EXPECT_EQ(pCsr0->peekTaskCount() + 1, migrationSyncData->latestTaskCountUsed); } -TEST_F(MigrationControllerTests, givenWaitForTimestampsEnabledWhenHandleMigrationIsCalledThenSignalTaskCountBasedUsage) { +HWTEST2_F(MigrationControllerTests, givenWaitForTimestampsEnabledWhenHandleMigrationIsCalledThenSignalTaskCountBasedUsage, IsAtLeastGen12lp) { DebugManagerStateRestore restorer; DebugManager.flags.EnableTimestampWaitForQueues.set(4); @@ -237,7 +241,7 @@ TEST_F(MigrationControllerTests, givenWaitForTimestampsEnabledWhenHandleMigratio EXPECT_EQ(1u, migrationSyncData->signalUsageCalled); } -TEST_F(MigrationControllerTests, whenMemoryMigrationForMemoryObjectIsAlreadyInProgressThenDoEarlyReturn) { +HWTEST2_F(MigrationControllerTests, whenMemoryMigrationForMemoryObjectIsAlreadyInProgressThenDoEarlyReturn, IsAtLeastGen12lp) { std::unique_ptr pBuffer(BufferHelper<>::create(&context)); ASSERT_TRUE(pBuffer->getMultiGraphicsAllocation().requiresMigrations());