From 93e3d948f5f7e9367a6715bdf5e771a26320e95d Mon Sep 17 00:00:00 2001 From: Mateusz Jablonski Date: Thu, 21 Oct 2021 11:16:19 +0000 Subject: [PATCH] Fallback to cpu copy when filling work partition allocation move some command stream receiver tests to shared Related-To: NEO-6325 Signed-off-by: Mateusz Jablonski --- .../unit_test/command_stream/CMakeLists.txt | 2 +- .../cl_command_stream_receiver_tests.cpp | 131 ++++++++++++ .../drm_memory_manager_localmem_tests.cpp | 2 + .../linux/drm_memory_manager_tests.cpp | 57 +++++ .../windows/wddm_memory_manager_tests.cpp | 49 +++++ .../command_stream_receiver.cpp | 5 +- .../source/memory_manager/memory_manager.cpp | 14 ++ shared/source/memory_manager/memory_manager.h | 5 +- .../os_interface/linux/drm_memory_manager.cpp | 11 +- .../os_interface/linux/drm_memory_manager.h | 1 + .../windows/wddm_memory_manager.cpp | 27 +++ .../windows/wddm_memory_manager.h | 1 + ...memory_manager_allocate_in_device_pool.cpp | 5 +- .../test/common/mocks/mock_memory_manager.cpp | 6 + .../test/common/mocks/mock_memory_manager.h | 12 ++ .../unit_test/command_stream/CMakeLists.txt | 1 + .../command_stream_receiver_tests.cpp | 201 +++++------------- 17 files changed, 378 insertions(+), 152 deletions(-) create mode 100644 opencl/test/unit_test/command_stream/cl_command_stream_receiver_tests.cpp rename {opencl => shared}/test/unit_test/command_stream/command_stream_receiver_tests.cpp (91%) diff --git a/opencl/test/unit_test/command_stream/CMakeLists.txt b/opencl/test/unit_test/command_stream/CMakeLists.txt index f80c03e70e..148d4c5b95 100644 --- a/opencl/test/unit_test/command_stream/CMakeLists.txt +++ b/opencl/test/unit_test/command_stream/CMakeLists.txt @@ -11,6 +11,7 @@ set(IGDRCL_SRCS_tests_command_stream ${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver_3_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_file_stream_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_subcapture_tests.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/cl_command_stream_receiver_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cmd_parse_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_1_tests.cpp @@ -18,7 +19,6 @@ set(IGDRCL_SRCS_tests_command_stream ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_tests.inl ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_mt_tests.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_flush_task_1_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_flush_task_2_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_flush_task_3_tests.cpp diff --git a/opencl/test/unit_test/command_stream/cl_command_stream_receiver_tests.cpp b/opencl/test/unit_test/command_stream/cl_command_stream_receiver_tests.cpp new file mode 100644 index 0000000000..9387ef94c9 --- /dev/null +++ b/opencl/test/unit_test/command_stream/cl_command_stream_receiver_tests.cpp @@ -0,0 +1,131 @@ +/* + * Copyright (C) 2018-2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/memory_manager/surface.h" +#include "shared/test/common/fixtures/device_fixture.h" +#include "shared/test/common/mocks/mock_csr.h" + +#include "opencl/source/mem_obj/buffer.h" +#include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" +#include "opencl/test/unit_test/helpers/raii_hw_helper.h" +#include "opencl/test/unit_test/mocks/mock_context.h" +#include "opencl/test/unit_test/mocks/mock_hw_helper.h" +#include "test.h" + +#include "gmock/gmock.h" + +using namespace NEO; + +TEST(ClCommandStreamReceiverTest, WhenMakingResidentThenBufferResidencyFlagIsSet) { + MockContext context; + auto commandStreamReceiver = context.getDevice(0)->getDefaultEngine().commandStreamReceiver; + float srcMemory[] = {1.0f}; + + auto retVal = CL_INVALID_VALUE; + auto buffer = Buffer::create( + &context, + CL_MEM_USE_HOST_PTR, + sizeof(srcMemory), + srcMemory, + retVal); + ASSERT_NE(nullptr, buffer); + + auto graphicsAllocation = buffer->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex()); + EXPECT_FALSE(graphicsAllocation->isResident(commandStreamReceiver->getOsContext().getContextId())); + + commandStreamReceiver->makeResident(*graphicsAllocation); + + EXPECT_TRUE(graphicsAllocation->isResident(commandStreamReceiver->getOsContext().getContextId())); + + delete buffer; +} + +using ClCommandStreamReceiverTests = Test; + +HWTEST_F(ClCommandStreamReceiverTests, givenCommandStreamReceiverWhenFenceAllocationIsRequiredAndCreateGlobalFenceAllocationIsCalledThenFenceAllocationIsAllocated) { + RAIIHwHelperFactory> hwHelperBackup{pDevice->getHardwareInfo().platform.eRenderCoreFamily}; + + MockCsrHw csr(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); + csr.setupContext(*pDevice->getDefaultEngine().osContext); + EXPECT_EQ(nullptr, csr.globalFenceAllocation); + + EXPECT_TRUE(csr.createGlobalFenceAllocation()); + + ASSERT_NE(nullptr, csr.globalFenceAllocation); + EXPECT_EQ(GraphicsAllocation::AllocationType::GLOBAL_FENCE, csr.globalFenceAllocation->getAllocationType()); +} + +HWTEST_F(ClCommandStreamReceiverTests, givenCommandStreamReceiverWhenGettingFenceAllocationThenCorrectFenceAllocationIsReturned) { + RAIIHwHelperFactory> hwHelperBackup{pDevice->getHardwareInfo().platform.eRenderCoreFamily}; + + CommandStreamReceiverHw csr(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); + csr.setupContext(*pDevice->getDefaultEngine().osContext); + EXPECT_EQ(nullptr, csr.getGlobalFenceAllocation()); + + EXPECT_TRUE(csr.createGlobalFenceAllocation()); + + ASSERT_NE(nullptr, csr.getGlobalFenceAllocation()); + EXPECT_EQ(GraphicsAllocation::AllocationType::GLOBAL_FENCE, csr.getGlobalFenceAllocation()->getAllocationType()); +} + +using CommandStreamReceiverMultiRootDeviceTest = MultiRootDeviceFixture; + +TEST_F(CommandStreamReceiverMultiRootDeviceTest, WhenCreatingCommandStreamGraphicsAllocationsThenTheyHaveCorrectRootDeviceIndex) { + auto commandStreamReceiver = &device1->getGpgpuCommandStreamReceiver(); + + ASSERT_NE(nullptr, commandStreamReceiver); + EXPECT_EQ(expectedRootDeviceIndex, commandStreamReceiver->getRootDeviceIndex()); + + // Linear stream / Command buffer + GraphicsAllocation *allocation = mockMemoryManager->allocateGraphicsMemoryWithProperties({expectedRootDeviceIndex, 128u, GraphicsAllocation::AllocationType::COMMAND_BUFFER, device1->getDeviceBitfield()}); + LinearStream commandStream{allocation}; + + commandStreamReceiver->ensureCommandBufferAllocation(commandStream, 100u, 0u); + EXPECT_EQ(allocation, commandStream.getGraphicsAllocation()); + EXPECT_EQ(128u, commandStream.getMaxAvailableSpace()); + EXPECT_EQ(expectedRootDeviceIndex, commandStream.getGraphicsAllocation()->getRootDeviceIndex()); + + commandStreamReceiver->ensureCommandBufferAllocation(commandStream, 1024u, 0u); + EXPECT_NE(allocation, commandStream.getGraphicsAllocation()); + EXPECT_EQ(0u, commandStream.getMaxAvailableSpace() % MemoryConstants::pageSize64k); + EXPECT_EQ(expectedRootDeviceIndex, commandStream.getGraphicsAllocation()->getRootDeviceIndex()); + mockMemoryManager->freeGraphicsMemory(commandStream.getGraphicsAllocation()); + + // Debug surface + auto debugSurface = commandStreamReceiver->allocateDebugSurface(MemoryConstants::pageSize); + ASSERT_NE(nullptr, debugSurface); + EXPECT_EQ(expectedRootDeviceIndex, debugSurface->getRootDeviceIndex()); + + // Indirect heaps + IndirectHeap::Type heapTypes[]{IndirectHeap::DYNAMIC_STATE, IndirectHeap::INDIRECT_OBJECT, IndirectHeap::SURFACE_STATE}; + for (auto heapType : heapTypes) { + IndirectHeap *heap = nullptr; + commandStreamReceiver->allocateHeapMemory(heapType, MemoryConstants::pageSize, heap); + ASSERT_NE(nullptr, heap); + ASSERT_NE(nullptr, heap->getGraphicsAllocation()); + EXPECT_EQ(expectedRootDeviceIndex, heap->getGraphicsAllocation()->getRootDeviceIndex()); + mockMemoryManager->freeGraphicsMemory(heap->getGraphicsAllocation()); + delete heap; + } + + // Tag allocation + ASSERT_NE(nullptr, commandStreamReceiver->getTagAllocation()); + EXPECT_EQ(expectedRootDeviceIndex, commandStreamReceiver->getTagAllocation()->getRootDeviceIndex()); + + // Preemption allocation + if (nullptr == commandStreamReceiver->getPreemptionAllocation()) { + commandStreamReceiver->createPreemptionAllocation(); + } + EXPECT_EQ(expectedRootDeviceIndex, commandStreamReceiver->getPreemptionAllocation()->getRootDeviceIndex()); + + // HostPtr surface + char memory[8] = {1, 2, 3, 4, 5, 6, 7, 8}; + HostPtrSurface surface(memory, sizeof(memory), true); + EXPECT_TRUE(commandStreamReceiver->createAllocationForHostSurface(surface, false)); + ASSERT_NE(nullptr, surface.getAllocation()); + EXPECT_EQ(expectedRootDeviceIndex, surface.getAllocation()->getRootDeviceIndex()); +} diff --git a/opencl/test/unit_test/os_interface/linux/drm_memory_manager_localmem_tests.cpp b/opencl/test/unit_test/os_interface/linux/drm_memory_manager_localmem_tests.cpp index 1a03b2a4fe..0a51c1b099 100644 --- a/opencl/test/unit_test/os_interface/linux/drm_memory_manager_localmem_tests.cpp +++ b/opencl/test/unit_test/os_interface/linux/drm_memory_manager_localmem_tests.cpp @@ -763,6 +763,7 @@ TEST_F(DrmMemoryManagerCopyMemoryToAllocationTest, givenDrmMemoryManagerWhenCopy allocData.flags.allocateMemory = true; allocData.type = GraphicsAllocation::AllocationType::KERNEL_ISA; allocData.rootDeviceIndex = rootDeviceIndex; + allocData.storageInfo.memoryBanks.set(0, true); MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; auto allocation = drmMemoryManger.allocateGraphicsMemoryInDevicePool(allocData, status); ASSERT_NE(nullptr, allocation); @@ -785,6 +786,7 @@ TEST_F(DrmMemoryManagerCopyMemoryToAllocationTest, givenDrmMemoryManagerWhenCopy allocData.flags.allocateMemory = true; allocData.type = GraphicsAllocation::AllocationType::KERNEL_ISA; allocData.rootDeviceIndex = rootDeviceIndex; + allocData.storageInfo.memoryBanks.set(0, true); MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success; auto allocation = drmMemoryManger.allocateGraphicsMemoryInDevicePool(allocData, status); ASSERT_NE(nullptr, allocation); diff --git a/opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp b/opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp index d02b8179ae..a1d9657b29 100644 --- a/opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp +++ b/opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp @@ -5418,4 +5418,61 @@ TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenOversize memoryManager->freeGraphicsMemory(allocation); } +struct DrmMemoryManagerToTestCopyMemoryToAllocationBanks : public DrmMemoryManager { + DrmMemoryManagerToTestCopyMemoryToAllocationBanks(ExecutionEnvironment &executionEnvironment, size_t lockableLocalMemorySize) + : DrmMemoryManager(gemCloseWorkerMode::gemCloseWorkerInactive, false, false, executionEnvironment) { + lockedLocalMemorySize = lockableLocalMemorySize; + } + void *lockResourceInLocalMemoryImpl(BufferObject *bo) override { + if (lockedLocalMemorySize > 0) { + if (static_cast(bo->peekHandle()) < lockedLocalMemory.size()) { + lockedLocalMemory[bo->peekHandle()].reset(new uint8_t[lockedLocalMemorySize]); + return lockedLocalMemory[bo->peekHandle()].get(); + } + } + return nullptr; + } + void unlockResourceInLocalMemoryImpl(BufferObject *bo) override { + } + std::array, 4> lockedLocalMemory; + size_t lockedLocalMemorySize = 0; +}; + +TEST(DrmMemoryManagerCopyMemoryToAllocationBanksTest, givenDrmMemoryManagerWhenCopyMemoryToAllocationOnSpecificMemoryBanksThenAllocationIsFilledWithCorrectDataOnSpecificBanks) { + uint8_t sourceData[64]{}; + size_t offset = 3; + size_t sourceAllocationSize = sizeof(sourceData); + size_t destinationAllocationSize = sourceAllocationSize + offset; + MockExecutionEnvironment executionEnvironment; + auto drm = new DrmMock(mockFd, *executionEnvironment.rootDeviceEnvironments[0]); + executionEnvironment.rootDeviceEnvironments[0]->osInterface.reset(new OSInterface()); + executionEnvironment.rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr(drm)); + DrmMemoryManagerToTestCopyMemoryToAllocationBanks drmMemoryManger(executionEnvironment, destinationAllocationSize); + std::vector dataToCopy(sourceAllocationSize, 1u); + + MockDrmAllocation mockAllocation(GraphicsAllocation::AllocationType::WORK_PARTITION_SURFACE, MemoryPool::LocalMemory); + + mockAllocation.storageInfo.memoryBanks = 0b1110; + DeviceBitfield memoryBanksToCopy = 0b1010; + mockAllocation.bufferObjects.clear(); + + for (auto index = 0u; index < 4; index++) { + drmMemoryManger.lockedLocalMemory[index].reset(); + mockAllocation.bufferObjects.push_back(new BufferObject(drm, index, sourceAllocationSize, 3)); + } + + auto ret = drmMemoryManger.copyMemoryToAllocationBanks(&mockAllocation, offset, dataToCopy.data(), dataToCopy.size(), memoryBanksToCopy); + EXPECT_TRUE(ret); + + EXPECT_EQ(nullptr, drmMemoryManger.lockedLocalMemory[0].get()); + ASSERT_NE(nullptr, drmMemoryManger.lockedLocalMemory[1].get()); + EXPECT_EQ(nullptr, drmMemoryManger.lockedLocalMemory[2].get()); + ASSERT_NE(nullptr, drmMemoryManger.lockedLocalMemory[3].get()); + + EXPECT_EQ(0, memcmp(ptrOffset(drmMemoryManger.lockedLocalMemory[1].get(), offset), dataToCopy.data(), dataToCopy.size())); + EXPECT_EQ(0, memcmp(ptrOffset(drmMemoryManger.lockedLocalMemory[3].get(), offset), dataToCopy.data(), dataToCopy.size())); + for (auto index = 0u; index < 4; index++) { + delete mockAllocation.bufferObjects[index]; + } +} } // namespace NEO diff --git a/opencl/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp b/opencl/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp index c7e9d17651..97e14dc0fc 100644 --- a/opencl/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp +++ b/opencl/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp @@ -2497,3 +2497,52 @@ TEST_F(WddmMemoryManagerSimpleTest, whenAlignmentRequirementExceedsPageSizeThenA EXPECT_EQ(0U, memoryManager.callCount.allocateGraphicsMemoryUsingKmdAndMapItToCpuVA); } } + +struct WddmWithMockedLock : public WddmMock { + using WddmMock::WddmMock; + + void *lockResource(const D3DKMT_HANDLE &handle, bool applyMakeResidentPriorToLock, size_t size) override { + if (handle < storageLocked.size()) { + storageLocked.set(handle); + } + return storages[handle]; + } + std::bitset<4> storageLocked{}; + uint8_t storages[EngineLimits::maxHandleCount][MemoryConstants::pageSize64k] = {0u}; +}; + +TEST(WddmMemoryManagerCopyMemoryToAllocationBanksTest, givenAllocationWithMultiTilePlacementWhenCopyDataSpecificMemoryBanksThenLockOnlySpecificStorages) { + uint8_t sourceData[32]{}; + size_t offset = 3; + size_t sourceAllocationSize = sizeof(sourceData); + auto hwInfo = *defaultHwInfo; + hwInfo.featureTable.ftrLocalMemory = true; + + MockExecutionEnvironment executionEnvironment(&hwInfo); + executionEnvironment.initGmm(); + auto wddm = new WddmWithMockedLock(*executionEnvironment.rootDeviceEnvironments[0]); + wddm->init(); + MemoryManagerCreate memoryManager(true, true, executionEnvironment); + + MockWddmAllocation mockAllocation(executionEnvironment.rootDeviceEnvironments[0]->getGmmClientContext()); + + mockAllocation.storageInfo.memoryBanks = 0b1110; + DeviceBitfield memoryBanksToCopy = 0b1010; + mockAllocation.handles.resize(4); + for (auto index = 0u; index < 4; index++) { + wddm->storageLocked.set(index, false); + if (mockAllocation.storageInfo.memoryBanks.test(index)) { + mockAllocation.handles[index] = index; + } + } + std::vector dataToCopy(sourceAllocationSize, 1u); + auto ret = memoryManager.copyMemoryToAllocationBanks(&mockAllocation, offset, dataToCopy.data(), dataToCopy.size(), memoryBanksToCopy); + EXPECT_TRUE(ret); + + EXPECT_FALSE(wddm->storageLocked.test(0)); + ASSERT_TRUE(wddm->storageLocked.test(1)); + EXPECT_FALSE(wddm->storageLocked.test(2)); + ASSERT_TRUE(wddm->storageLocked.test(3)); + EXPECT_EQ(0, memcmp(ptrOffset(wddm->storages[1], offset), dataToCopy.data(), dataToCopy.size())); + EXPECT_EQ(0, memcmp(ptrOffset(wddm->storages[3], offset), dataToCopy.data(), dataToCopy.size())); +} \ No newline at end of file diff --git a/shared/source/command_stream/command_stream_receiver.cpp b/shared/source/command_stream/command_stream_receiver.cpp index 29c5260929..52b278d735 100644 --- a/shared/source/command_stream/command_stream_receiver.cpp +++ b/shared/source/command_stream/command_stream_receiver.cpp @@ -570,12 +570,11 @@ bool CommandStreamReceiver::createWorkPartitionAllocation(const Device &device) } const uint32_t copySrc = deviceIndex; - const Vec3 copySrcSize = {sizeof(copySrc), 1, 1}; DeviceBitfield copyBitfield{}; copyBitfield.set(deviceIndex); - BlitOperationResult blitResult = BlitHelper::blitMemoryToAllocationBanks(device, workPartitionAllocation, 0, ©Src, copySrcSize, copyBitfield); + auto copySuccess = MemoryTransferHelper::transferMemoryToAllocationBanks(device, workPartitionAllocation, 0, ©Src, sizeof(copySrc), copyBitfield); - if (blitResult != BlitOperationResult::Success) { + if (!copySuccess) { return false; } } diff --git a/shared/source/memory_manager/memory_manager.cpp b/shared/source/memory_manager/memory_manager.cpp index 69ab681808..ba28ea3bb4 100644 --- a/shared/source/memory_manager/memory_manager.cpp +++ b/shared/source/memory_manager/memory_manager.cpp @@ -688,6 +688,11 @@ bool MemoryManager::copyMemoryToAllocation(GraphicsAllocation *graphicsAllocatio return true; } +bool MemoryManager::copyMemoryToAllocationBanks(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy, DeviceBitfield dstMemoryBanks) { + memcpy_s(ptrOffset(static_cast(graphicsAllocation->getUnderlyingBuffer()), destinationOffset), + (graphicsAllocation->getUnderlyingBufferSize() - destinationOffset), memoryToCopy, sizeToCopy); + return true; +} void MemoryManager::waitForEnginesCompletion(GraphicsAllocation &graphicsAllocation) { for (auto &engine : getRegisteredEngines()) { auto osContextId = engine.osContext->getContextId(); @@ -864,4 +869,13 @@ bool MemoryTransferHelper::transferMemoryToAllocation(bool useBlitter, const Dev } return device.getMemoryManager()->copyMemoryToAllocation(dstAllocation, dstOffset, srcMemory, srcSize); } +bool MemoryTransferHelper::transferMemoryToAllocationBanks(const Device &device, GraphicsAllocation *dstAllocation, size_t dstOffset, const void *srcMemory, + size_t srcSize, DeviceBitfield dstMemoryBanks) { + auto blitSuccess = BlitHelper::blitMemoryToAllocationBanks(device, dstAllocation, dstOffset, srcMemory, {srcSize, 1, 1}, dstMemoryBanks) == BlitOperationResult::Success; + + if (!blitSuccess) { + return device.getMemoryManager()->copyMemoryToAllocationBanks(dstAllocation, dstOffset, srcMemory, srcSize, dstMemoryBanks); + } + return true; +} } // namespace NEO diff --git a/shared/source/memory_manager/memory_manager.h b/shared/source/memory_manager/memory_manager.h index b58e77d04d..d61f18a982 100644 --- a/shared/source/memory_manager/memory_manager.h +++ b/shared/source/memory_manager/memory_manager.h @@ -56,7 +56,9 @@ constexpr size_t paddingBufferSize = 2 * MemoryConstants::megaByte; namespace MemoryTransferHelper { bool transferMemoryToAllocation(bool useBlitter, const Device &device, GraphicsAllocation *dstAllocation, size_t dstOffset, const void *srcMemory, size_t srcSize); -} +bool transferMemoryToAllocationBanks(const Device &device, GraphicsAllocation *dstAllocation, size_t dstOffset, const void *srcMemory, + size_t srcSize, DeviceBitfield dstMemoryBanks); +} // namespace MemoryTransferHelper class MemoryManager { public: @@ -191,6 +193,7 @@ class MemoryManager { HostPtrManager *getHostPtrManager() const { return hostPtrManager.get(); } void setDefaultEngineIndex(uint32_t rootDeviceIndex, uint32_t engineIndex) { defaultEngineIndex[rootDeviceIndex] = engineIndex; } virtual bool copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy); + virtual bool copyMemoryToAllocationBanks(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy, DeviceBitfield dstMemoryBanks); HeapIndex selectHeap(const GraphicsAllocation *allocation, bool hasPointer, bool isFullRangeSVM, bool useFrontWindow); static std::unique_ptr createMemoryManager(ExecutionEnvironment &executionEnvironment, DriverModelType driverModel = DriverModelType::UNKNOWN); virtual void *reserveCpuAddressRange(size_t size, uint32_t rootDeviceIndex) { return nullptr; }; diff --git a/shared/source/os_interface/linux/drm_memory_manager.cpp b/shared/source/os_interface/linux/drm_memory_manager.cpp index 1e25b88bec..fe760fa6a3 100644 --- a/shared/source/os_interface/linux/drm_memory_manager.cpp +++ b/shared/source/os_interface/linux/drm_memory_manager.cpp @@ -1111,8 +1111,17 @@ bool DrmMemoryManager::copyMemoryToAllocation(GraphicsAllocation *graphicsAlloca if (graphicsAllocation->getUnderlyingBuffer() || !isLocalMemorySupported(graphicsAllocation->getRootDeviceIndex())) { return MemoryManager::copyMemoryToAllocation(graphicsAllocation, destinationOffset, memoryToCopy, sizeToCopy); } + return copyMemoryToAllocationBanks(graphicsAllocation, destinationOffset, memoryToCopy, sizeToCopy, graphicsAllocation->storageInfo.memoryBanks); +} +bool DrmMemoryManager::copyMemoryToAllocationBanks(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy, DeviceBitfield dstMemoryBanks) { + if (MemoryPool::isSystemMemoryPool(graphicsAllocation->getMemoryPool())) { + return false; + } auto drmAllocation = static_cast(graphicsAllocation); - for (auto handleId = 0u; handleId < graphicsAllocation->storageInfo.getNumBanks(); handleId++) { + for (auto handleId = 0u; handleId < graphicsAllocation->storageInfo.getTotalBanksCnt(); handleId++) { + if (!dstMemoryBanks.test(handleId)) { + continue; + } auto ptr = lockResourceInLocalMemoryImpl(drmAllocation->getBOs()[handleId]); if (!ptr) { return false; diff --git a/shared/source/os_interface/linux/drm_memory_manager.h b/shared/source/os_interface/linux/drm_memory_manager.h index 24385bfdb5..b40c784277 100644 --- a/shared/source/os_interface/linux/drm_memory_manager.h +++ b/shared/source/os_interface/linux/drm_memory_manager.h @@ -57,6 +57,7 @@ class DrmMemoryManager : public MemoryManager { DrmGemCloseWorker *peekGemCloseWorker() const { return this->gemCloseWorker.get(); } bool copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy) override; + bool copyMemoryToAllocationBanks(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy, DeviceBitfield dstMemoryBanks) override; MOCKABLE_VIRTUAL int obtainFdFromHandle(int boHandle, uint32_t rootDeviceindex); AddressRange reserveGpuAddress(size_t size, uint32_t rootDeviceIndex) override; diff --git a/shared/source/os_interface/windows/wddm_memory_manager.cpp b/shared/source/os_interface/windows/wddm_memory_manager.cpp index 22cc232f62..95c7464cfb 100644 --- a/shared/source/os_interface/windows/wddm_memory_manager.cpp +++ b/shared/source/os_interface/windows/wddm_memory_manager.cpp @@ -18,6 +18,7 @@ #include "shared/source/helpers/heap_assigner.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/ptr_math.h" +#include "shared/source/helpers/string.h" #include "shared/source/helpers/surface_format_info.h" #include "shared/source/memory_manager/deferrable_deletion.h" #include "shared/source/memory_manager/deferred_deleter.h" @@ -857,4 +858,30 @@ bool WddmMemoryManager::isCpuCopyRequired(const void *ptr) { return inputPointerReadDelta > slownessFactor * fastestLocalRead; } +bool WddmMemoryManager::copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy) { + if (graphicsAllocation->getUnderlyingBuffer()) { + return MemoryManager::copyMemoryToAllocation(graphicsAllocation, destinationOffset, memoryToCopy, sizeToCopy); + } + return copyMemoryToAllocationBanks(graphicsAllocation, destinationOffset, memoryToCopy, sizeToCopy, graphicsAllocation->storageInfo.memoryBanks); +} + +bool WddmMemoryManager::copyMemoryToAllocationBanks(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy, DeviceBitfield dstMemoryBanks) { + if (MemoryPool::isSystemMemoryPool(graphicsAllocation->getMemoryPool())) { + return false; + } + auto wddmAllocation = static_cast(graphicsAllocation); + for (auto handleId = 0u; handleId < graphicsAllocation->storageInfo.getTotalBanksCnt(); handleId++) { + if (!dstMemoryBanks.test(handleId)) { + continue; + } + auto ptr = getWddm(graphicsAllocation->getRootDeviceIndex()).lockResource(wddmAllocation->getHandles()[handleId], wddmAllocation->needsMakeResidentBeforeLock, wddmAllocation->getAlignedSize()); + if (!ptr) { + return false; + } + memcpy_s(ptrOffset(ptr, destinationOffset), graphicsAllocation->getUnderlyingBufferSize() - destinationOffset, memoryToCopy, sizeToCopy); + getWddm(graphicsAllocation->getRootDeviceIndex()).unlockResource(wddmAllocation->getHandles()[handleId]); + } + return true; +} + } // namespace NEO diff --git a/shared/source/os_interface/windows/wddm_memory_manager.h b/shared/source/os_interface/windows/wddm_memory_manager.h index 00908d1062..5840bb9d7b 100644 --- a/shared/source/os_interface/windows/wddm_memory_manager.h +++ b/shared/source/os_interface/windows/wddm_memory_manager.h @@ -65,6 +65,7 @@ class WddmMemoryManager : public MemoryManager { AlignedMallocRestrictions *getAlignedMallocRestrictions() override; bool copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy) override; + bool copyMemoryToAllocationBanks(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy, DeviceBitfield dstMemoryBanks) override; void *reserveCpuAddressRange(size_t size, uint32_t rootDeviceIndex) override; void releaseReservedCpuAddressRange(void *reserved, size_t size, uint32_t rootDeviceIndex) override; bool isCpuCopyRequired(const void *ptr) override; diff --git a/shared/source/os_interface/windows/wddm_memory_manager_allocate_in_device_pool.cpp b/shared/source/os_interface/windows/wddm_memory_manager_allocate_in_device_pool.cpp index 199e56cb11..1f1a4d0c5b 100644 --- a/shared/source/os_interface/windows/wddm_memory_manager_allocate_in_device_pool.cpp +++ b/shared/source/os_interface/windows/wddm_memory_manager_allocate_in_device_pool.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -13,9 +13,6 @@ GraphicsAllocation *WddmMemoryManager::allocateGraphicsMemoryInDevicePool(const status = AllocationStatus::RetryInNonDevicePool; return nullptr; } -bool WddmMemoryManager::copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy) { - return MemoryManager::copyMemoryToAllocation(graphicsAllocation, destinationOffset, memoryToCopy, sizeToCopy); -} bool WddmMemoryManager::mapGpuVirtualAddress(WddmAllocation *allocation, const void *requiredPtr) { if (allocation->getNumGmms() > 1) { return mapMultiHandleAllocationWithRetry(allocation, requiredPtr); diff --git a/shared/test/common/mocks/mock_memory_manager.cpp b/shared/test/common/mocks/mock_memory_manager.cpp index 969597962c..7204a9cf08 100644 --- a/shared/test/common/mocks/mock_memory_manager.cpp +++ b/shared/test/common/mocks/mock_memory_manager.cpp @@ -157,6 +157,12 @@ GraphicsAllocation *MockMemoryManager::createGraphicsAllocationFromExistingStora return allocation; } +bool MockMemoryManager::copyMemoryToAllocationBanks(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy, DeviceBitfield dstMemoryBanks) { + copyMemoryToAllocationBanksCalled++; + copyMemoryToAllocationBanksParamsPassed.push_back({graphicsAllocation, destinationOffset, memoryToCopy, sizeToCopy, dstMemoryBanks}); + return OsAgnosticMemoryManager::copyMemoryToAllocationBanks(graphicsAllocation, destinationOffset, memoryToCopy, sizeToCopy, dstMemoryBanks); +}; + FailMemoryManager::FailMemoryManager(int32_t failedAllocationsCount, ExecutionEnvironment &executionEnvironment) : MockMemoryManager(executionEnvironment) { this->failedAllocationsCount = failedAllocationsCount; } diff --git a/shared/test/common/mocks/mock_memory_manager.h b/shared/test/common/mocks/mock_memory_manager.h index 6a5ce31898..f585e4e1b3 100644 --- a/shared/test/common/mocks/mock_memory_manager.h +++ b/shared/test/common/mocks/mock_memory_manager.h @@ -149,6 +149,18 @@ class MockMemoryManager : public MemoryManagerCreate { return MemoryManager::setMemAdvise(gfxAllocation, flags, rootDeviceIndex); } + struct CopyMemoryToAllocationBanksParams { + GraphicsAllocation *graphicsAllocation = nullptr; + size_t destinationOffset = 0u; + const void *memoryToCopy = nullptr; + size_t sizeToCopy = 0u; + DeviceBitfield dstMemoryBanks = {}; + }; + + StackVec copyMemoryToAllocationBanksParamsPassed{}; + bool copyMemoryToAllocationBanks(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy, DeviceBitfield dstMemoryBanks) override; + + uint32_t copyMemoryToAllocationBanksCalled = 0u; uint32_t freeGraphicsMemoryCalled = 0u; uint32_t unlockResourceCalled = 0u; uint32_t lockResourceCalled = 0u; diff --git a/shared/test/unit_test/command_stream/CMakeLists.txt b/shared/test/unit_test/command_stream/CMakeLists.txt index fb7d58e41d..da229cbeee 100644 --- a/shared/test/unit_test/command_stream/CMakeLists.txt +++ b/shared/test/unit_test/command_stream/CMakeLists.txt @@ -6,6 +6,7 @@ target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt + ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}stream_properties_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/stream_properties_tests_common.cpp ${CMAKE_CURRENT_SOURCE_DIR}/stream_properties_tests_common.h diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp similarity index 91% rename from opencl/test/unit_test/command_stream/command_stream_receiver_tests.cpp rename to shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index 5b8e41a21f..4e33478905 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -1,60 +1,41 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ -#include "shared/source/command_stream/command_stream_receiver.h" +#include "shared/source/command_container/implicit_scaling.h" #include "shared/source/command_stream/command_stream_receiver_simulated_hw.h" -#include "shared/source/command_stream/linear_stream.h" -#include "shared/source/command_stream/preemption.h" -#include "shared/source/command_stream/scratch_space_controller.h" #include "shared/source/gmm_helper/page_table_mngr.h" -#include "shared/source/helpers/cache_policy.h" -#include "shared/source/helpers/hw_helper.h" -#include "shared/source/helpers/timestamp_packet.h" -#include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/internal_allocation_storage.h" -#include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/surface.h" #include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/utilities/tag_allocator.h" +#include "shared/test/common/fixtures/device_fixture.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/mocks/mock_allocation_properties.h" -#include "shared/test/common/mocks/mock_builtins.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_execution_environment.h" -#include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/matchers.h" #include "shared/test/common/test_macros/test_checks_shared.h" #include "shared/test/unit_test/direct_submission/direct_submission_controller_mock.h" -#include "opencl/source/mem_obj/buffer.h" -#include "opencl/source/platform/platform.h" -#include "opencl/test/unit_test/fixtures/cl_device_fixture.h" -#include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" -#include "opencl/test/unit_test/helpers/raii_hw_helper.h" -#include "opencl/test/unit_test/mocks/mock_buffer.h" -#include "opencl/test/unit_test/mocks/mock_context.h" -#include "opencl/test/unit_test/mocks/mock_hw_helper.h" -#include "opencl/test/unit_test/mocks/mock_platform.h" -#include "opencl/test/unit_test/mocks/mock_program.h" #include "test.h" #include "gmock/gmock.h" using namespace NEO; -struct CommandStreamReceiverTest : public ClDeviceFixture, +struct CommandStreamReceiverTest : public DeviceFixture, public ::testing::Test { void SetUp() override { - ClDeviceFixture::SetUp(); + DeviceFixture::SetUp(); commandStreamReceiver = &pDevice->getGpgpuCommandStreamReceiver(); ASSERT_NE(nullptr, commandStreamReceiver); @@ -63,7 +44,7 @@ struct CommandStreamReceiverTest : public ClDeviceFixture, } void TearDown() override { - ClDeviceFixture::TearDown(); + DeviceFixture::TearDown(); } CommandStreamReceiver *commandStreamReceiver; @@ -106,29 +87,6 @@ HWTEST_F(CommandStreamReceiverTest, WhenCreatingCsrThenFlagsAreSetCorrectly) { EXPECT_FALSE(csr.lastSentUseGlobalAtomics); } -TEST_F(CommandStreamReceiverTest, WhenMakingResidentThenBufferResidencyFlagIsSet) { - MockContext context; - float srcMemory[] = {1.0f}; - - auto retVal = CL_INVALID_VALUE; - auto buffer = Buffer::create( - &context, - CL_MEM_USE_HOST_PTR, - sizeof(srcMemory), - srcMemory, - retVal); - ASSERT_NE(nullptr, buffer); - - auto graphicsAllocation = buffer->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex()); - EXPECT_FALSE(graphicsAllocation->isResident(commandStreamReceiver->getOsContext().getContextId())); - - commandStreamReceiver->makeResident(*graphicsAllocation); - - EXPECT_TRUE(graphicsAllocation->isResident(commandStreamReceiver->getOsContext().getContextId())); - - delete buffer; -} - TEST_F(CommandStreamReceiverTest, givenBaseDownloadAllocationCalledThenDoesNotChangeAnything) { auto *memoryManager = commandStreamReceiver->getMemoryManager(); @@ -881,32 +839,6 @@ TEST(CommandStreamReceiverSimpleTest, givenCommandStreamReceiverWhenInitializeTa } } -HWTEST_F(CommandStreamReceiverTest, givenCommandStreamReceiverWhenFenceAllocationIsRequiredAndCreateGlobalFenceAllocationIsCalledThenFenceAllocationIsAllocated) { - RAIIHwHelperFactory> hwHelperBackup{pDevice->getHardwareInfo().platform.eRenderCoreFamily}; - - MockCsrHw csr(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); - csr.setupContext(*pDevice->getDefaultEngine().osContext); - EXPECT_EQ(nullptr, csr.globalFenceAllocation); - - EXPECT_TRUE(csr.createGlobalFenceAllocation()); - - ASSERT_NE(nullptr, csr.globalFenceAllocation); - EXPECT_EQ(GraphicsAllocation::AllocationType::GLOBAL_FENCE, csr.globalFenceAllocation->getAllocationType()); -} - -HWTEST_F(CommandStreamReceiverTest, givenCommandStreamReceiverWhenGettingFenceAllocationThenCorrectFenceAllocationIsReturned) { - RAIIHwHelperFactory> hwHelperBackup{pDevice->getHardwareInfo().platform.eRenderCoreFamily}; - - CommandStreamReceiverHw csr(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); - csr.setupContext(*pDevice->getDefaultEngine().osContext); - EXPECT_EQ(nullptr, csr.getGlobalFenceAllocation()); - - EXPECT_TRUE(csr.createGlobalFenceAllocation()); - - ASSERT_NE(nullptr, csr.getGlobalFenceAllocation()); - EXPECT_EQ(GraphicsAllocation::AllocationType::GLOBAL_FENCE, csr.getGlobalFenceAllocation()->getAllocationType()); -} - TEST(CommandStreamReceiverSimpleTest, givenNullHardwareDebugModeWhenInitializeTagAllocationIsCalledThenTagAllocationIsBeingAllocatedAndinitialValueIsMinusOne) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableNullHardware.set(true); @@ -1129,9 +1061,7 @@ TEST(CommandStreamReceiverSimpleTest, givenMultipleActivePartitionsWhenWaitingFo } TEST(CommandStreamReceiverMultiContextTests, givenMultipleCsrsWhenSameResourcesAreUsedThenResidencyIsProperlyHandled) { - auto executionEnvironment = platform()->peekExecutionEnvironment(); - - std::unique_ptr device(Device::create(executionEnvironment, 0u)); + std::unique_ptr device(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get(), 0u)); auto &commandStreamReceiver0 = *device->commandStreamReceivers[0]; auto &commandStreamReceiver1 = *device->commandStreamReceivers[1]; @@ -1166,16 +1096,14 @@ TEST(CommandStreamReceiverMultiContextTests, givenMultipleCsrsWhenSameResourcesA struct CreateAllocationForHostSurfaceTest : public ::testing::Test { void SetUp() override { - executionEnvironment = platform()->peekExecutionEnvironment(); - executionEnvironment->prepareRootDeviceEnvironments(1u); - executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(&hwInfo); - gmockMemoryManager = new ::testing::NiceMock(*executionEnvironment); - executionEnvironment->memoryManager.reset(gmockMemoryManager); - device.reset(MockDevice::create(executionEnvironment, 0u)); + executionEnvironment.incRefInternal(); + gmockMemoryManager = new ::testing::NiceMock(executionEnvironment); + executionEnvironment.memoryManager.reset(gmockMemoryManager); + device.reset(MockDevice::createWithExecutionEnvironment(&hwInfo, &executionEnvironment, 0u)); commandStreamReceiver = &device->getGpgpuCommandStreamReceiver(); } + MockExecutionEnvironment executionEnvironment; HardwareInfo hwInfo = *defaultHwInfo; - ExecutionEnvironment *executionEnvironment = nullptr; GMockMemoryManager *gmockMemoryManager = nullptr; std::unique_ptr device; CommandStreamReceiver *commandStreamReceiver = nullptr; @@ -1383,6 +1311,7 @@ HWTEST_F(CommandStreamReceiverTest, whenCreatingCommandStreamReceiverThenLastAdd HWTEST_F(CommandStreamReceiverTest, givenDebugFlagWhenCreatingCsrThenSetEnableStaticPartitioningAccordingly) { DebugManagerStateRestore restore{}; + VariableBackup backup(&ImplicitScaling::apiSupport, true); { UltDeviceFactory deviceFactory{1, 2}; @@ -1548,64 +1477,6 @@ HWTEST_F(SimulatedCommandStreamReceiverTest, givenOsContextWithNoDeviceBitfieldW EXPECT_EQ(0u, csr.getDeviceIndex()); } -using CommandStreamReceiverMultiRootDeviceTest = MultiRootDeviceFixture; - -TEST_F(CommandStreamReceiverMultiRootDeviceTest, WhenCreatingCommandStreamGraphicsAllocationsThenTheyHaveCorrectRootDeviceIndex) { - auto commandStreamReceiver = &device1->getGpgpuCommandStreamReceiver(); - - ASSERT_NE(nullptr, commandStreamReceiver); - EXPECT_EQ(expectedRootDeviceIndex, commandStreamReceiver->getRootDeviceIndex()); - - // Linear stream / Command buffer - GraphicsAllocation *allocation = mockMemoryManager->allocateGraphicsMemoryWithProperties({expectedRootDeviceIndex, 128u, GraphicsAllocation::AllocationType::COMMAND_BUFFER, device1->getDeviceBitfield()}); - LinearStream commandStream{allocation}; - - commandStreamReceiver->ensureCommandBufferAllocation(commandStream, 100u, 0u); - EXPECT_EQ(allocation, commandStream.getGraphicsAllocation()); - EXPECT_EQ(128u, commandStream.getMaxAvailableSpace()); - EXPECT_EQ(expectedRootDeviceIndex, commandStream.getGraphicsAllocation()->getRootDeviceIndex()); - - commandStreamReceiver->ensureCommandBufferAllocation(commandStream, 1024u, 0u); - EXPECT_NE(allocation, commandStream.getGraphicsAllocation()); - EXPECT_EQ(0u, commandStream.getMaxAvailableSpace() % MemoryConstants::pageSize64k); - EXPECT_EQ(expectedRootDeviceIndex, commandStream.getGraphicsAllocation()->getRootDeviceIndex()); - mockMemoryManager->freeGraphicsMemory(commandStream.getGraphicsAllocation()); - - // Debug surface - auto debugSurface = commandStreamReceiver->allocateDebugSurface(MemoryConstants::pageSize); - ASSERT_NE(nullptr, debugSurface); - EXPECT_EQ(expectedRootDeviceIndex, debugSurface->getRootDeviceIndex()); - - // Indirect heaps - IndirectHeap::Type heapTypes[]{IndirectHeap::DYNAMIC_STATE, IndirectHeap::INDIRECT_OBJECT, IndirectHeap::SURFACE_STATE}; - for (auto heapType : heapTypes) { - IndirectHeap *heap = nullptr; - commandStreamReceiver->allocateHeapMemory(heapType, MemoryConstants::pageSize, heap); - ASSERT_NE(nullptr, heap); - ASSERT_NE(nullptr, heap->getGraphicsAllocation()); - EXPECT_EQ(expectedRootDeviceIndex, heap->getGraphicsAllocation()->getRootDeviceIndex()); - mockMemoryManager->freeGraphicsMemory(heap->getGraphicsAllocation()); - delete heap; - } - - // Tag allocation - ASSERT_NE(nullptr, commandStreamReceiver->getTagAllocation()); - EXPECT_EQ(expectedRootDeviceIndex, commandStreamReceiver->getTagAllocation()->getRootDeviceIndex()); - - // Preemption allocation - if (nullptr == commandStreamReceiver->getPreemptionAllocation()) { - commandStreamReceiver->createPreemptionAllocation(); - } - EXPECT_EQ(expectedRootDeviceIndex, commandStreamReceiver->getPreemptionAllocation()->getRootDeviceIndex()); - - // HostPtr surface - char memory[8] = {1, 2, 3, 4, 5, 6, 7, 8}; - HostPtrSurface surface(memory, sizeof(memory), true); - EXPECT_TRUE(commandStreamReceiver->createAllocationForHostSurface(surface, false)); - ASSERT_NE(nullptr, surface.getAllocation()); - EXPECT_EQ(expectedRootDeviceIndex, surface.getAllocation()->getRootDeviceIndex()); -} - using CommandStreamReceiverPageTableManagerTest = ::testing::Test; TEST_F(CommandStreamReceiverPageTableManagerTest, givenExistingPageTableManagerWhenNeedsPageTableManagerIsCalledThenFalseIsReturned) { @@ -1632,3 +1503,49 @@ TEST_F(CommandStreamReceiverPageTableManagerTest, givenNonExisitingPageTableMana EXPECT_EQ(supportsPageTableManager, commandStreamReceiver.needsPageTableManager()); } + +TEST(CreateWorkPartitionAllocationTest, givenDisabledBlitterWhenInitializingWorkPartitionAllocationThenFallbackToCpuCopy) { + DebugManagerStateRestore restore{}; + VariableBackup backup(&ImplicitScaling::apiSupport, true); + + UltDeviceFactory deviceFactory{1, 2}; + MockDevice &device = *deviceFactory.rootDevices[0]; + + auto memoryManager = static_cast(device.getMemoryManager()); + auto commandStreamReceiver = device.getDefaultEngine().commandStreamReceiver; + memoryManager->freeGraphicsMemory(commandStreamReceiver->getWorkPartitionAllocation()); + + DebugManager.flags.EnableBlitterOperationsSupport.set(0); + memoryManager->copyMemoryToAllocationBanksCalled = 0u; + memoryManager->copyMemoryToAllocationBanksParamsPassed.clear(); + auto retVal = commandStreamReceiver->createWorkPartitionAllocation(device); + EXPECT_TRUE(retVal); + EXPECT_EQ(2u, memoryManager->copyMemoryToAllocationBanksCalled); + EXPECT_EQ(deviceFactory.subDevices[0]->getDeviceBitfield(), memoryManager->copyMemoryToAllocationBanksParamsPassed[0].dstMemoryBanks); + EXPECT_EQ(deviceFactory.subDevices[1]->getDeviceBitfield(), memoryManager->copyMemoryToAllocationBanksParamsPassed[1].dstMemoryBanks); + for (auto i = 0; i < 2; i++) { + EXPECT_EQ(commandStreamReceiver->getWorkPartitionAllocation(), memoryManager->copyMemoryToAllocationBanksParamsPassed[i].graphicsAllocation); + EXPECT_EQ(sizeof(uint32_t), memoryManager->copyMemoryToAllocationBanksParamsPassed[i].sizeToCopy); + EXPECT_NE(nullptr, memoryManager->copyMemoryToAllocationBanksParamsPassed[i].memoryToCopy); + } +} + +TEST(CreateWorkPartitionAllocationTest, givenEnabledBlitterWhenInitializingWorkPartitionAllocationThenDontCopyOnCpu) { + DebugManagerStateRestore restore{}; + VariableBackup backup(&ImplicitScaling::apiSupport, true); + + UltDeviceFactory deviceFactory{1, 2}; + MockDevice &device = *deviceFactory.rootDevices[0]; + auto memoryManager = static_cast(device.getMemoryManager()); + auto commandStreamReceiver = device.getDefaultEngine().commandStreamReceiver; + + device.getRootDeviceEnvironmentRef().getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true; + REQUIRE_BLITTER_OR_SKIP(&device.getHardwareInfo()); + memoryManager->freeGraphicsMemory(commandStreamReceiver->getWorkPartitionAllocation()); + + memoryManager->copyMemoryToAllocationBanksCalled = 0u; + memoryManager->copyMemoryToAllocationBanksParamsPassed.clear(); + auto retVal = commandStreamReceiver->createWorkPartitionAllocation(device); + EXPECT_TRUE(retVal); + EXPECT_EQ(0u, memoryManager->copyMemoryToAllocationBanksCalled); +} \ No newline at end of file