diff --git a/CMakeLists.txt b/CMakeLists.txt index 8e1d8d0a02..6e56dda815 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -940,11 +940,9 @@ else() include_directories(${NEO_SHARED_DIRECTORY}/gmm_helper/windows/gmm_memory) endif() include_directories(${NEO_SHARED_DIRECTORY}/helpers/definitions${BRANCH_DIR_SUFFIX}) -include_directories(${NEO_SHARED_DIRECTORY}/memory_manager/definitions${BRANCH_DIR_SUFFIX}) include_directories(${NEO_SHARED_DIRECTORY}/memory_properties${BRANCH_DIR_SUFFIX}) include_directories(${NEO_SHARED_DIRECTORY}/sku_info/definitions${BRANCH_DIR_SUFFIX}) include_directories(${NEO_SOURCE_DIR}/opencl/source/command_queue/definitions${BRANCH_DIR_SUFFIX}) -include_directories(${NEO_SOURCE_DIR}/opencl/source/command_stream/definitions${BRANCH_DIR_SUFFIX}) include_directories(${NEO_SOURCE_DIR}/opencl/source/mem_obj/definitions${BRANCH_DIR_SUFFIX}) include_directories(${NEO_SOURCE_DIR}/opencl/source/memory_manager/definitions${BRANCH_DIR_SUFFIX}) if(MSVC) diff --git a/opencl/source/command_stream/CMakeLists.txt b/opencl/source/command_stream/CMakeLists.txt index ed1fb0603b..e6396d8025 100644 --- a/opencl/source/command_stream/CMakeLists.txt +++ b/opencl/source/command_stream/CMakeLists.txt @@ -19,7 +19,7 @@ set(RUNTIME_SRCS_COMMAND_STREAM ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_with_aub_dump.inl ${CMAKE_CURRENT_SOURCE_DIR}/create_command_stream_impl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/create_command_stream_impl.h - ${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/command_stream_receiver_simulated_hw.h + ${CMAKE_CURRENT_SOURCE_DIR}/definitions/command_stream_receiver_simulated_hw.h ) if(SUPPORT_XEHP_PLUS) diff --git a/opencl/source/command_stream/aub_command_stream_receiver_hw.h b/opencl/source/command_stream/aub_command_stream_receiver_hw.h index cf13b99121..89691f8836 100644 --- a/opencl/source/command_stream/aub_command_stream_receiver_hw.h +++ b/opencl/source/command_stream/aub_command_stream_receiver_hw.h @@ -14,9 +14,9 @@ #include "shared/source/utilities/spinlock.h" #include "opencl/source/command_stream/aub_command_stream_receiver.h" +#include "opencl/source/command_stream/definitions/command_stream_receiver_simulated_hw.h" #include "aub_mapper.h" -#include "command_stream_receiver_simulated_hw.h" namespace NEO { diff --git a/opencl/source/command_stream/definitions/command_stream_receiver_simulated_hw.h b/opencl/source/command_stream/definitions/command_stream_receiver_simulated_hw.h index 7e872661c4..80511e5557 100644 --- a/opencl/source/command_stream/definitions/command_stream_receiver_simulated_hw.h +++ b/opencl/source/command_stream/definitions/command_stream_receiver_simulated_hw.h @@ -6,12 +6,22 @@ */ #pragma once +#include "shared/source/aub/aub_helper.h" +#include "shared/source/helpers/debug_helpers.h" +#include "shared/source/helpers/hw_helper.h" +#include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/memory_banks.h" +#include "shared/source/memory_manager/memory_pool.h" #include "shared/source/memory_manager/physical_address_allocator.h" +#include "shared/source/os_interface/os_context.h" #include "opencl/source/command_stream/command_stream_receiver_simulated_common_hw.h" +#include "opencl/source/helpers/hardware_context_controller.h" +#include "aub_mem_dump.h" #include "third_party/aub_stream/headers/allocation_params.h" +#include "third_party/aub_stream/headers/aub_manager.h" +#include "third_party/aub_stream/headers/hardware_context.h" namespace NEO { class GraphicsAllocation; @@ -21,11 +31,47 @@ class CommandStreamReceiverSimulatedHw : public CommandStreamReceiverSimulatedCo using CommandStreamReceiverSimulatedCommonHw::CommandStreamReceiverSimulatedCommonHw; using CommandStreamReceiverSimulatedCommonHw::osContext; using CommandStreamReceiverSimulatedCommonHw::getDeviceIndex; + using CommandStreamReceiverSimulatedCommonHw::aubManager; + using CommandStreamReceiverSimulatedCommonHw::hardwareContextController; + using CommandStreamReceiverSimulatedCommonHw::writeMemory; public: uint32_t getMemoryBank(GraphicsAllocation *allocation) const { - return MemoryBanks::getBank(getDeviceIndex()); + if (aubManager) { + return static_cast(getMemoryBanksBitfield(allocation).to_ulong()); + } + + uint32_t deviceIndexChosen = allocation->storageInfo.memoryBanks.any() + ? getDeviceIndexFromStorageInfo(allocation->storageInfo) + : getDeviceIndex(); + + if (allocation->getMemoryPool() == MemoryPool::LocalMemory) { + return MemoryBanks::getBankForLocalMemory(deviceIndexChosen); + } + return MemoryBanks::getBank(deviceIndexChosen); } + + static uint32_t getDeviceIndexFromStorageInfo(StorageInfo storageInfo) { + uint32_t deviceIndex = 0; + while (!storageInfo.memoryBanks.test(0)) { + storageInfo.memoryBanks >>= 1; + deviceIndex++; + } + return deviceIndex; + } + + DeviceBitfield getMemoryBanksBitfield(GraphicsAllocation *allocation) const { + if (allocation->getMemoryPool() == MemoryPool::LocalMemory) { + if (allocation->storageInfo.memoryBanks.any()) { + if (allocation->storageInfo.cloningOfPageTables || this->isMultiOsContextCapable()) { + return allocation->storageInfo.memoryBanks; + } + } + return this->osContext->getDeviceBitfield(); + } + return {}; + } + int getAddressSpace(int hint) { bool traceLocalAllowed = false; switch (hint) { @@ -46,7 +92,9 @@ class CommandStreamReceiverSimulatedHw : public CommandStreamReceiverSimulatedCo return AubMemDump::AddressSpaceValues::TraceNonlocal; } PhysicalAddressAllocator *createPhysicalAddressAllocator(const HardwareInfo *hwInfo) { - return new PhysicalAddressAllocator(); + const auto bankSize = AubHelper::getMemBankSize(hwInfo); + const auto devicesCount = HwHelper::getSubDevicesCount(hwInfo); + return new PhysicalAddressAllocatorHw(bankSize, devicesCount); } void writeMemoryWithAubManager(GraphicsAllocation &graphicsAllocation) override { uint64_t gpuAddress; @@ -64,20 +112,44 @@ class CommandStreamReceiverSimulatedHw : public CommandStreamReceiverSimulatedCo allocationParams.additionalParams.compressionEnabled = gmm ? gmm->isCompressionEnabled : false; - this->aubManager->writeMemory2(allocationParams); + if (graphicsAllocation.storageInfo.cloningOfPageTables || !graphicsAllocation.isAllocatedInLocalMemoryPool()) { + aubManager->writeMemory2(allocationParams); + } else { + hardwareContextController->writeMemory(allocationParams); + } } void setAubWritable(bool writable, GraphicsAllocation &graphicsAllocation) override { - graphicsAllocation.setAubWritable(writable, getMemoryBank(&graphicsAllocation)); + auto bank = getMemoryBank(&graphicsAllocation); + if (bank == 0u || graphicsAllocation.storageInfo.cloningOfPageTables) { + bank = GraphicsAllocation::defaultBank; + } + + graphicsAllocation.setAubWritable(writable, bank); } + bool isAubWritable(GraphicsAllocation &graphicsAllocation) const override { - return graphicsAllocation.isAubWritable(getMemoryBank(&graphicsAllocation)); + auto bank = getMemoryBank(&graphicsAllocation); + if (bank == 0u || graphicsAllocation.storageInfo.cloningOfPageTables) { + bank = GraphicsAllocation::defaultBank; + } + return graphicsAllocation.isAubWritable(bank); } + void setTbxWritable(bool writable, GraphicsAllocation &graphicsAllocation) override { - graphicsAllocation.setTbxWritable(writable, getMemoryBank(&graphicsAllocation)); + auto bank = getMemoryBank(&graphicsAllocation); + if (bank == 0u || graphicsAllocation.storageInfo.cloningOfPageTables) { + bank = GraphicsAllocation::defaultBank; + } + graphicsAllocation.setTbxWritable(writable, bank); } + bool isTbxWritable(GraphicsAllocation &graphicsAllocation) const override { - return graphicsAllocation.isTbxWritable(getMemoryBank(&graphicsAllocation)); + auto bank = getMemoryBank(&graphicsAllocation); + if (bank == 0u || graphicsAllocation.storageInfo.cloningOfPageTables) { + bank = GraphicsAllocation::defaultBank; + } + return graphicsAllocation.isTbxWritable(bank); } }; } // namespace NEO diff --git a/opencl/test/unit_test/command_stream/CMakeLists.txt b/opencl/test/unit_test/command_stream/CMakeLists.txt index 828e997773..dcd284ee4d 100644 --- a/opencl/test/unit_test/command_stream/CMakeLists.txt +++ b/opencl/test/unit_test/command_stream/CMakeLists.txt @@ -25,6 +25,7 @@ set(IGDRCL_SRCS_tests_command_stream ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_flush_task_4_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_flush_task_gmock_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_with_aub_dump_tests.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_simulated_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/create_command_stream_receiver_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/get_devices_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/experimental_command_buffer_tests.cpp diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_simulated_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_simulated_tests.cpp new file mode 100644 index 0000000000..ca690eab80 --- /dev/null +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_simulated_tests.cpp @@ -0,0 +1,544 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/helpers/array_count.h" +#include "shared/source/memory_manager/memory_pool.h" +#include "shared/source/memory_manager/os_agnostic_memory_manager.h" +#include "shared/source/os_interface/os_context.h" +#include "shared/test/common/helpers/debug_manager_state_restore.h" +#include "shared/test/common/mocks/mock_aub_manager.h" +#include "shared/test/common/mocks/mock_graphics_allocation.h" + +#include "opencl/source/command_stream/definitions/command_stream_receiver_simulated_hw.h" +#include "opencl/source/helpers/hardware_context_controller.h" +#include "opencl/test/unit_test/helpers/hw_helper_tests.h" +#include "opencl/test/unit_test/mocks/mock_csr_simulated_common_hw.h" +#include "opencl/test/unit_test/mocks/mock_gmm.h" +#include "opencl/test/unit_test/mocks/mock_os_context.h" +#include "test.h" + +#include "aub_mem_dump.h" + +#include +#include +using namespace NEO; + +using CommandStreamSimulatedTests = HwHelperTest; + +HWTEST_F(CommandStreamSimulatedTests, givenLocalMemoryAndAllocationWithStorageInfoNonZeroWhenMemoryBankIsQueriedThenBankForAllocationDeviceIsReturned) { + ExecutionEnvironment executionEnvironment; + hardwareInfo.featureTable.ftrLocalMemory = true; + executionEnvironment.prepareRootDeviceEnvironments(1); + executionEnvironment.rootDeviceEnvironments[0]->setHwInfo(&hardwareInfo); + executionEnvironment.initializeMemoryManager(); + + MemoryAllocation allocation(0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, reinterpret_cast(0x1000), 0x1000u, + MemoryConstants::pageSize, 0, MemoryPool::LocalMemory, false, false, mockMaxOsContextCount); + allocation.storageInfo.memoryBanks = 0x2u; + + auto csr = std::make_unique>(executionEnvironment, 0, 1); + + if (csr->localMemoryEnabled) { + auto bank = csr->getMemoryBank(&allocation); + EXPECT_EQ(MemoryBanks::getBankForLocalMemory(1), bank); + } +} + +HWTEST_F(CommandStreamSimulatedTests, givenLocalMemoryAndNonLocalMemoryAllocationWithStorageInfoNonZeroWhenMemoryBankIsQueriedThenMainBankIsReturned) { + ExecutionEnvironment executionEnvironment; + hardwareInfo.featureTable.ftrLocalMemory = true; + executionEnvironment.prepareRootDeviceEnvironments(1); + executionEnvironment.rootDeviceEnvironments[0]->setHwInfo(&hardwareInfo); + + executionEnvironment.initializeMemoryManager(); + MemoryAllocation allocation(0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, reinterpret_cast(0x1000), 0x1000u, + MemoryConstants::pageSize, 0, MemoryPool::System4KBPages, false, false, mockMaxOsContextCount); + allocation.storageInfo.memoryBanks = 0x2u; + + auto csr = std::make_unique>(executionEnvironment, 0, 1); + auto bank = csr->getMemoryBank(&allocation); + EXPECT_EQ(MemoryBanks::MainBank, bank); +} + +HWTEST_F(CommandStreamSimulatedTests, givenLocalMemoryAndAllocationWithStorageInfoZeroWhenMemoryBankIsQueriedThenBankForCsrIsReturned) { + ExecutionEnvironment executionEnvironment; + hardwareInfo.featureTable.ftrLocalMemory = true; + executionEnvironment.prepareRootDeviceEnvironments(1); + executionEnvironment.rootDeviceEnvironments[0]->setHwInfo(&hardwareInfo); + executionEnvironment.initializeMemoryManager(); + + MemoryAllocation allocation(0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, reinterpret_cast(0x1000), 0x1000u, + MemoryConstants::pageSize, 0, MemoryPool::LocalMemory, false, false, mockMaxOsContextCount); + allocation.storageInfo.memoryBanks = 0x0u; + + DeviceBitfield deviceBitfield(0b100); + auto csr = std::make_unique>(executionEnvironment, 0, deviceBitfield); + auto osContext = executionEnvironment.memoryManager->createAndRegisterOsContext(csr.get(), EngineTypeUsage{aub_stream::EngineType::ENGINE_RCS, EngineUsage::Regular}, + deviceBitfield, + PreemptionMode::Disabled, + false); + csr->setupContext(*osContext); + auto bank = csr->getMemoryBank(&allocation); + EXPECT_EQ(MemoryBanks::getBankForLocalMemory(2), bank); +} + +HWTEST_F(CommandStreamSimulatedTests, givenLocalMemoryAndNonLocalMemoryAllocationWithStorageInfoNonZeroWhenMemoryBanksBitfieldIsQueriedThenBanksBitfieldForSystemMemoryIsReturned) { + ExecutionEnvironment executionEnvironment; + hardwareInfo.featureTable.ftrLocalMemory = true; + executionEnvironment.prepareRootDeviceEnvironments(1); + executionEnvironment.rootDeviceEnvironments[0]->setHwInfo(&hardwareInfo); + executionEnvironment.initializeMemoryManager(); + + MemoryAllocation allocation(0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, reinterpret_cast(0x1000), 0x1000u, + MemoryConstants::pageSize, 0, MemoryPool::System64KBPages, false, false, mockMaxOsContextCount); + allocation.storageInfo.memoryBanks = 0x3u; + + DeviceBitfield deviceBitfield(1); + auto csr = std::make_unique>(executionEnvironment, 0, deviceBitfield); + auto osContext = executionEnvironment.memoryManager->createAndRegisterOsContext(csr.get(), EngineTypeUsage{aub_stream::EngineType::ENGINE_RCS, EngineUsage::Regular}, + deviceBitfield, + PreemptionMode::Disabled, + false); + csr->setupContext(*osContext); + auto banksBitfield = csr->getMemoryBanksBitfield(&allocation); + EXPECT_TRUE(banksBitfield.none()); +} + +HWTEST_F(CommandStreamSimulatedTests, givenLocalMemoryNoncloneableAllocationWithManyBanksWhenMemoryBanksBitfieldIsQueriedThenSingleMemoryBankIsReturned) { + ExecutionEnvironment executionEnvironment; + hardwareInfo.featureTable.ftrLocalMemory = true; + executionEnvironment.prepareRootDeviceEnvironments(1); + executionEnvironment.rootDeviceEnvironments[0]->setHwInfo(&hardwareInfo); + executionEnvironment.initializeMemoryManager(); + + MemoryAllocation allocation(0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, reinterpret_cast(0x1000), 0x1000u, + MemoryConstants::pageSize, 0, MemoryPool::LocalMemory, false, false, mockMaxOsContextCount); + allocation.storageInfo.memoryBanks = 0x3u; + allocation.storageInfo.cloningOfPageTables = false; + + DeviceBitfield deviceBitfield(0x1u); + auto csr = std::make_unique>(executionEnvironment, 0, deviceBitfield); + auto osContext = executionEnvironment.memoryManager->createAndRegisterOsContext(csr.get(), EngineTypeUsage{aub_stream::EngineType::ENGINE_RCS, EngineUsage::Regular}, + deviceBitfield, + PreemptionMode::Disabled, + false); + csr->setupContext(*osContext); + EXPECT_FALSE(csr->isMultiOsContextCapable()); + + if (csr->localMemoryEnabled) { + auto banksBitfield = csr->getMemoryBanksBitfield(&allocation); + EXPECT_EQ(0x1lu, banksBitfield.to_ulong()); + } +} + +HWTEST_F(CommandStreamSimulatedTests, givenLocalMemoryCloneableAllocationWithManyBanksWhenMemoryBanksBitfieldIsQueriedThenAllMemoryBanksAreReturned) { + ExecutionEnvironment executionEnvironment; + hardwareInfo.featureTable.ftrLocalMemory = true; + executionEnvironment.prepareRootDeviceEnvironments(1); + executionEnvironment.rootDeviceEnvironments[0]->setHwInfo(&hardwareInfo); + executionEnvironment.initializeMemoryManager(); + + MemoryAllocation allocation(0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, reinterpret_cast(0x1000), 0x1000u, + MemoryConstants::pageSize, 0, MemoryPool::LocalMemory, false, false, mockMaxOsContextCount); + allocation.storageInfo.memoryBanks = 0x3u; + allocation.storageInfo.cloningOfPageTables = true; + + DeviceBitfield deviceBitfield(1); + auto csr = std::make_unique>(executionEnvironment, 0, deviceBitfield); + EXPECT_FALSE(csr->isMultiOsContextCapable()); + auto osContext = executionEnvironment.memoryManager->createAndRegisterOsContext(csr.get(), EngineTypeUsage{aub_stream::EngineType::ENGINE_RCS, EngineUsage::Regular}, + deviceBitfield, + PreemptionMode::Disabled, + false); + csr->setupContext(*osContext); + + if (csr->localMemoryEnabled) { + auto banksBitfield = csr->getMemoryBanksBitfield(&allocation); + EXPECT_EQ(0x3lu, banksBitfield.to_ulong()); + } +} + +HWTEST_F(CommandStreamSimulatedTests, givenLocalMemoryNoncloneableAllocationWithManyBanksWhenMemoryBanksBitfieldIsQueriedOnSpecialCsrThenAllMemoryBanksAreReturned) { + ExecutionEnvironment executionEnvironment; + hardwareInfo.featureTable.ftrLocalMemory = true; + executionEnvironment.prepareRootDeviceEnvironments(1); + executionEnvironment.rootDeviceEnvironments[0]->setHwInfo(&hardwareInfo); + executionEnvironment.initializeMemoryManager(); + + MemoryAllocation allocation(0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, reinterpret_cast(0x1000), 0x1000u, + MemoryConstants::pageSize, 0, MemoryPool::LocalMemory, false, false, mockMaxOsContextCount); + allocation.storageInfo.memoryBanks = 0x3u; + allocation.storageInfo.cloningOfPageTables = false; + + DeviceBitfield deviceBitfield(0b11); + MockSimulatedCsrHw csr(executionEnvironment, 0, deviceBitfield); + csr.multiOsContextCapable = true; + EXPECT_TRUE(csr.isMultiOsContextCapable()); + auto osContext = executionEnvironment.memoryManager->createAndRegisterOsContext(&csr, EngineTypeUsage{aub_stream::EngineType::ENGINE_RCS, EngineUsage::Regular}, + deviceBitfield, + PreemptionMode::Disabled, + false); + csr.setupContext(*osContext); + + if (csr.localMemoryEnabled) { + auto banksBitfield = csr.getMemoryBanksBitfield(&allocation); + EXPECT_EQ(0x3lu, banksBitfield.to_ulong()); + } +} + +HWTEST_F(CommandStreamSimulatedTests, givenLocalMemoryAndAllocationWithStorageInfoZeroWhenMemoryBanksBitfieldIsQueriedThenBanksBitfieldForCsrDeviceIndexIsReturned) { + ExecutionEnvironment executionEnvironment; + hardwareInfo.featureTable.ftrLocalMemory = true; + executionEnvironment.prepareRootDeviceEnvironments(1); + executionEnvironment.rootDeviceEnvironments[0]->setHwInfo(&hardwareInfo); + executionEnvironment.initializeMemoryManager(); + + MemoryAllocation allocation(0, GraphicsAllocation::AllocationType::UNKNOWN, nullptr, reinterpret_cast(0x1000), 0x1000u, + MemoryConstants::pageSize, 0, MemoryPool::LocalMemory, false, false, mockMaxOsContextCount); + allocation.storageInfo.memoryBanks = 0x0u; + + DeviceBitfield deviceBitfield(0b100); + auto csr = std::make_unique>(executionEnvironment, 0, deviceBitfield); + auto deviceIndex = 2u; + + auto osContext = executionEnvironment.memoryManager->createAndRegisterOsContext(csr.get(), EngineTypeUsage{aub_stream::EngineType::ENGINE_RCS, EngineUsage::Regular}, + deviceBitfield, PreemptionMode::Disabled, + false); + csr->setupContext(*osContext); + auto banksBitfield = csr->getMemoryBanksBitfield(&allocation); + EXPECT_EQ(1u, banksBitfield.count()); + EXPECT_TRUE(banksBitfield.test(deviceIndex)); +} + +HWTEST_F(CommandStreamSimulatedTests, givenLocalMemoryWhenSimulatedCsrGetAddressSpaceIsCalledWithDifferentHintsThenCorrectSpaceIsReturned) { + ExecutionEnvironment executionEnvironment; + hardwareInfo.featureTable.ftrLocalMemory = true; + executionEnvironment.prepareRootDeviceEnvironments(1); + executionEnvironment.rootDeviceEnvironments[0]->setHwInfo(&hardwareInfo); + executionEnvironment.initializeMemoryManager(); + + std::array localMemoryHints = {AubMemDump::DataTypeHintValues::TraceLogicalRingContextRcs, + AubMemDump::DataTypeHintValues::TraceLogicalRingContextCcs, + AubMemDump::DataTypeHintValues::TraceLogicalRingContextBcs, + AubMemDump::DataTypeHintValues::TraceLogicalRingContextVcs, + AubMemDump::DataTypeHintValues::TraceLogicalRingContextVecs, + AubMemDump::DataTypeHintValues::TraceCommandBuffer}; + + auto csr = std::make_unique>(executionEnvironment, 0, 1); + + if (csr->localMemoryEnabled) { + for (const uint32_t hint : localMemoryHints) { + EXPECT_EQ(AubMemDump::AddressSpaceValues::TraceLocal, csr->getAddressSpace(hint)); + } + } + std::array nonLocalMemoryHints = {AubMemDump::DataTypeHintValues::TraceNotype}; + + for (const uint32_t hint : nonLocalMemoryHints) { + EXPECT_EQ(AubMemDump::AddressSpaceValues::TraceNonlocal, csr->getAddressSpace(hint)); + } +} + +HWTEST_F(CommandStreamSimulatedTests, givenLocalMemoryDisabledWhenSimulatedCsrGetAddressSpaceIsCalledWithDifferentHintsThenCorrectSpaceIsReturned) { + ExecutionEnvironment executionEnvironment; + hardwareInfo.featureTable.ftrLocalMemory = false; + executionEnvironment.prepareRootDeviceEnvironments(1); + executionEnvironment.rootDeviceEnvironments[0]->setHwInfo(&hardwareInfo); + executionEnvironment.initializeMemoryManager(); + + std::array nonLocalMemoryHints = {AubMemDump::DataTypeHintValues::TraceNotype, + AubMemDump::DataTypeHintValues::TraceLogicalRingContextRcs, + AubMemDump::DataTypeHintValues::TraceLogicalRingContextCcs, + AubMemDump::DataTypeHintValues::TraceLogicalRingContextBcs, + AubMemDump::DataTypeHintValues::TraceLogicalRingContextVcs, + AubMemDump::DataTypeHintValues::TraceLogicalRingContextVecs, + AubMemDump::DataTypeHintValues::TraceCommandBuffer}; + + auto csr = std::make_unique>(executionEnvironment, 0, 1); + + for (const uint32_t hint : nonLocalMemoryHints) { + EXPECT_EQ(AubMemDump::AddressSpaceValues::TraceNonlocal, csr->getAddressSpace(hint)); + } +} + +HWTEST_F(CommandStreamSimulatedTests, givenAUBDumpForceAllToLocalMemoryWhenSimulatedCsrGetAddressSpaceIsCalledWithDifferentHintsThenTraceLocalIsReturned) { + DebugManagerStateRestore debugRestorer; + DebugManager.flags.AUBDumpForceAllToLocalMemory.set(true); + + hardwareInfo.featureTable.ftrLocalMemory = false; + ExecutionEnvironment executionEnvironment; + executionEnvironment.prepareRootDeviceEnvironments(1); + executionEnvironment.rootDeviceEnvironments[0]->setHwInfo(&hardwareInfo); + executionEnvironment.initializeMemoryManager(); + + std::array localMemoryHints = {AubMemDump::DataTypeHintValues::TraceNotype, + AubMemDump::DataTypeHintValues::TraceLogicalRingContextRcs, + AubMemDump::DataTypeHintValues::TraceLogicalRingContextCcs, + AubMemDump::DataTypeHintValues::TraceLogicalRingContextBcs, + AubMemDump::DataTypeHintValues::TraceLogicalRingContextVcs, + AubMemDump::DataTypeHintValues::TraceLogicalRingContextVecs, + AubMemDump::DataTypeHintValues::TraceCommandBuffer}; + + auto csr = std::make_unique>(executionEnvironment, 0, 1); + + if (csr->localMemoryEnabled) { + for (const uint32_t hint : localMemoryHints) { + EXPECT_EQ(AubMemDump::AddressSpaceValues::TraceLocal, csr->getAddressSpace(hint)); + } + } +} + +HWTEST_F(CommandStreamSimulatedTests, givenMultipleBitsInStorageInfoWhenQueryingDeviceIndexThenLowestDeviceIndexIsReturned) { + StorageInfo storageInfo; + + storageInfo.memoryBanks = {1u | (1u << 2u)}; + auto deviceIndex = CommandStreamReceiverSimulatedHw::getDeviceIndexFromStorageInfo(storageInfo); + EXPECT_EQ(0u, deviceIndex); + + storageInfo.memoryBanks = (1u << 2u) | (1u << 3u); + + deviceIndex = CommandStreamReceiverSimulatedHw::getDeviceIndexFromStorageInfo(storageInfo); + EXPECT_EQ(2u, deviceIndex); +} + +HWTEST_F(CommandStreamSimulatedTests, givenSingleBitInStorageInfoWhenQueryingDeviceIndexThenCorrectDeviceIndexIsReturned) { + + StorageInfo storageInfo; + + for (uint32_t i = 0; i < 4u; i++) { + storageInfo.memoryBanks.reset(); + storageInfo.memoryBanks.set(i); + auto deviceIndex = CommandStreamReceiverSimulatedHw::getDeviceIndexFromStorageInfo(storageInfo); + EXPECT_EQ(i, deviceIndex); + } +} + +HWTEST_F(CommandStreamSimulatedTests, givenSimulatedCommandStreamReceiverWhenCloningPageTableIsRequiredThenAubManagerIsUsedForWriteMemory) { + auto mockManager = std::make_unique(); + + auto csr = std::make_unique>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); + csr->aubManager = mockManager.get(); + MockOsContext osContext(0, 1, EngineTypeUsage{aub_stream::ENGINE_RCS, EngineUsage::Regular}, PreemptionMode::Disabled, false); + csr->setupContext(osContext); + auto mockHardwareContext = static_cast(csr->hardwareContextController->hardwareContexts[0].get()); + + int dummy = 1; + GraphicsAllocation graphicsAllocation{0, GraphicsAllocation::AllocationType::UNKNOWN, + &dummy, 0, 0, sizeof(dummy), MemoryPool::MemoryNull, mockMaxOsContextCount}; + graphicsAllocation.storageInfo.cloningOfPageTables = true; + csr->writeMemoryWithAubManager(graphicsAllocation); + + EXPECT_FALSE(mockHardwareContext->writeMemory2Called); + EXPECT_TRUE(mockManager->writeMemory2Called); +} + +HWTEST_F(CommandStreamSimulatedTests, givenCompressedAllocationWhenCloningPageTableIsRequiredThenAubManagerIsUsedForWriteMemory) { + auto mockManager = std::make_unique(); + mockManager->storeAllocationParams = true; + + auto csr = std::make_unique>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); + csr->aubManager = mockManager.get(); + MockOsContext osContext(0, 1, EngineTypeUsage{aub_stream::ENGINE_RCS, EngineUsage::Regular}, PreemptionMode::Disabled, false); + csr->setupContext(osContext); + auto mockHardwareContext = static_cast(csr->hardwareContextController->hardwareContexts[0].get()); + + MockGmm gmm(pDevice->executionEnvironment->rootDeviceEnvironments[0]->getGmmClientContext(), nullptr, 1, 0, false); + gmm.isCompressionEnabled = true; + + int dummy = 1; + GraphicsAllocation graphicsAllocation{0, GraphicsAllocation::AllocationType::UNKNOWN, + &dummy, 0, 0, sizeof(dummy), MemoryPool::MemoryNull, mockMaxOsContextCount}; + graphicsAllocation.storageInfo.cloningOfPageTables = true; + + graphicsAllocation.setDefaultGmm(&gmm); + + csr->writeMemoryWithAubManager(graphicsAllocation); + + EXPECT_FALSE(mockHardwareContext->writeMemory2Called); + EXPECT_TRUE(mockManager->writeMemory2Called); + + EXPECT_EQ(1u, mockManager->storedAllocationParams.size()); + EXPECT_TRUE(mockManager->storedAllocationParams[0].additionalParams.compressionEnabled); +} + +HWTEST_F(CommandStreamSimulatedTests, givenTileInstancedAllocationWhenWriteMemoryWithAubManagerThenEachHardwareContextGetsDifferentMemoryBank) { + auto mockManager = std::make_unique(); + + auto csr = std::make_unique>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); + csr->aubManager = mockManager.get(); + MockOsContext osContext(0, 0b11, EngineTypeUsage{aub_stream::ENGINE_RCS, EngineUsage::Regular}, PreemptionMode::Disabled, false); + csr->hardwareContextController = std::make_unique(*mockManager, osContext, 0); + auto firstMockHardwareContext = static_cast(csr->hardwareContextController->hardwareContexts[0].get()); + auto secondMockHardwareContext = static_cast(csr->hardwareContextController->hardwareContexts[1].get()); + csr->multiOsContextCapable = true; + + int dummy = 1; + GraphicsAllocation graphicsAllocation{0, GraphicsAllocation::AllocationType::UNKNOWN, + &dummy, 0, 0, sizeof(dummy), MemoryPool::LocalMemory, mockMaxOsContextCount}; + graphicsAllocation.storageInfo.cloningOfPageTables = false; + graphicsAllocation.storageInfo.tileInstanced = true; + graphicsAllocation.storageInfo.memoryBanks = 0b11u; + csr->writeMemoryWithAubManager(graphicsAllocation); + + EXPECT_TRUE(firstMockHardwareContext->writeMemory2Called); + EXPECT_EQ(0b01u, firstMockHardwareContext->memoryBanksPassed); + EXPECT_TRUE(secondMockHardwareContext->writeMemory2Called); + EXPECT_EQ(0b10u, secondMockHardwareContext->memoryBanksPassed); + EXPECT_FALSE(mockManager->writeMemory2Called); +} + +HWTEST_F(CommandStreamSimulatedTests, givenCompressedTileInstancedAllocationWhenWriteMemoryWithAubManagerThenEachHardwareContextGetsCompressionInfo) { + auto mockManager = std::make_unique(); + + auto csr = std::make_unique>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); + csr->aubManager = mockManager.get(); + MockOsContext osContext(0, 0b11, EngineTypeUsage{aub_stream::ENGINE_RCS, EngineUsage::Regular}, PreemptionMode::Disabled, false); + csr->hardwareContextController = std::make_unique(*mockManager, osContext, 0); + auto firstMockHardwareContext = static_cast(csr->hardwareContextController->hardwareContexts[0].get()); + firstMockHardwareContext->storeAllocationParams = true; + auto secondMockHardwareContext = static_cast(csr->hardwareContextController->hardwareContexts[1].get()); + secondMockHardwareContext->storeAllocationParams = true; + + csr->multiOsContextCapable = true; + + MockGmm gmm(pDevice->executionEnvironment->rootDeviceEnvironments[0]->getGmmClientContext(), nullptr, 1, 0, false); + gmm.isCompressionEnabled = true; + + int dummy = 1; + GraphicsAllocation graphicsAllocation{0, GraphicsAllocation::AllocationType::UNKNOWN, + &dummy, 0, 0, sizeof(dummy), MemoryPool::LocalMemory, mockMaxOsContextCount}; + graphicsAllocation.storageInfo.cloningOfPageTables = false; + graphicsAllocation.storageInfo.tileInstanced = true; + graphicsAllocation.storageInfo.memoryBanks = 0b11u; + + graphicsAllocation.setDefaultGmm(&gmm); + + csr->writeMemoryWithAubManager(graphicsAllocation); + + EXPECT_TRUE(firstMockHardwareContext->writeMemory2Called); + EXPECT_EQ(1u, firstMockHardwareContext->storedAllocationParams.size()); + EXPECT_TRUE(firstMockHardwareContext->storedAllocationParams[0].additionalParams.compressionEnabled); + + EXPECT_TRUE(secondMockHardwareContext->writeMemory2Called); + EXPECT_EQ(1u, secondMockHardwareContext->storedAllocationParams.size()); + EXPECT_TRUE(secondMockHardwareContext->storedAllocationParams[0].additionalParams.compressionEnabled); +} + +HWTEST_F(CommandStreamSimulatedTests, givenTileInstancedAllocationWithMissingMemoryBankWhenWriteMemoryWithAubManagerThenAbortIsCalled) { + auto mockManager = std::make_unique(); + + auto csr = std::make_unique>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); + csr->aubManager = mockManager.get(); + MockOsContext osContext(0, 0b11, EngineTypeUsage{aub_stream::ENGINE_RCS, EngineUsage::Regular}, PreemptionMode::Disabled, false); + csr->hardwareContextController = std::make_unique(*mockManager, osContext, 0); + auto firstMockHardwareContext = static_cast(csr->hardwareContextController->hardwareContexts[0].get()); + auto secondMockHardwareContext = static_cast(csr->hardwareContextController->hardwareContexts[1].get()); + csr->multiOsContextCapable = true; + + int dummy = 1; + GraphicsAllocation graphicsAllocation{0, GraphicsAllocation::AllocationType::UNKNOWN, + &dummy, 0, 0, sizeof(dummy), MemoryPool::LocalMemory, mockMaxOsContextCount}; + graphicsAllocation.storageInfo.cloningOfPageTables = false; + graphicsAllocation.storageInfo.tileInstanced = true; + graphicsAllocation.storageInfo.memoryBanks = 2u; + EXPECT_THROW(csr->writeMemoryWithAubManager(graphicsAllocation), std::exception); + EXPECT_FALSE(firstMockHardwareContext->writeMemory2Called); + EXPECT_FALSE(secondMockHardwareContext->writeMemory2Called); +} + +HWTEST_F(CommandStreamSimulatedTests, givenCommandBufferAllocationWhenWriteMemoryCalledThenHintIsPassed) { + auto mockManager = std::make_unique(); + auto csr = std::make_unique>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); + csr->aubManager = mockManager.get(); + + int dummy = 1; + GraphicsAllocation graphicsAllocation{0, GraphicsAllocation::AllocationType::COMMAND_BUFFER, + &dummy, 0, 0, sizeof(dummy), MemoryPool::MemoryNull, mockMaxOsContextCount}; + graphicsAllocation.storageInfo.cloningOfPageTables = true; + csr->writeMemoryWithAubManager(graphicsAllocation); + + EXPECT_EQ(AubMemDump::DataTypeHintValues::TraceBatchBuffer, mockManager->hintToWriteMemory); + EXPECT_TRUE(mockManager->writeMemory2Called); +} + +HWTEST_F(CommandStreamSimulatedTests, givenSpecificMemoryPoolAllocationWhenWriteMemoryByAubManagerOrHardwareContextIsCalledThenCorrectPageSizeIsPassed) { + auto mockManager = std::make_unique(); + auto csr = std::make_unique>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); + csr->aubManager = mockManager.get(); + + MockOsContext osContext(0, 0b1, EngineTypeUsage{aub_stream::ENGINE_RCS, EngineUsage::Regular}, PreemptionMode::Disabled, false); + csr->hardwareContextController = std::make_unique(*mockManager, osContext, 0); + csr->setupContext(osContext); + auto mockHardwareContext = static_cast(csr->hardwareContextController->hardwareContexts[0].get()); + + int dummy = 1; + + MemoryPool::Type poolsWith4kPages[] = { + MemoryPool::System4KBPages, + MemoryPool::System4KBPagesWith32BitGpuAddressing, + MemoryPool::SystemCpuInaccessible}; + + for (size_t i = 0; i < arrayCount(poolsWith4kPages); i++) { + + mockManager->writeMemoryPageSizePassed = 0; + mockManager->writeMemory2Called = false; + + mockHardwareContext->writeMemoryPageSizePassed = 0; + mockHardwareContext->writeMemory2Called = false; + + GraphicsAllocation graphicsAllocation{0, GraphicsAllocation::AllocationType::COMMAND_BUFFER, + &dummy, 0, 0, sizeof(dummy), poolsWith4kPages[i], mockMaxOsContextCount}; + graphicsAllocation.storageInfo.cloningOfPageTables = true; + csr->writeMemoryWithAubManager(graphicsAllocation); + + EXPECT_TRUE(mockManager->writeMemory2Called); + EXPECT_EQ(MemoryConstants::pageSize, mockManager->writeMemoryPageSizePassed); + + graphicsAllocation.storageInfo.cloningOfPageTables = false; + csr->writeMemoryWithAubManager(graphicsAllocation); + + if (graphicsAllocation.isAllocatedInLocalMemoryPool()) { + EXPECT_TRUE(mockHardwareContext->writeMemory2Called); + EXPECT_EQ(MemoryConstants::pageSize, mockHardwareContext->writeMemoryPageSizePassed); + } else { + EXPECT_TRUE(mockManager->writeMemory2Called); + EXPECT_EQ(MemoryConstants::pageSize, mockManager->writeMemoryPageSizePassed); + } + } + + MemoryPool::Type poolsWith64kPages[] = { + MemoryPool::System64KBPages, + MemoryPool::System64KBPagesWith32BitGpuAddressing, + MemoryPool::LocalMemory}; + + for (size_t i = 0; i < arrayCount(poolsWith64kPages); i++) { + + mockManager->writeMemoryPageSizePassed = 0; + mockManager->writeMemory2Called = false; + + mockHardwareContext->writeMemoryPageSizePassed = 0; + mockHardwareContext->writeMemory2Called = false; + + GraphicsAllocation graphicsAllocation{0, GraphicsAllocation::AllocationType::COMMAND_BUFFER, + &dummy, 0, 0, sizeof(dummy), poolsWith64kPages[i], mockMaxOsContextCount}; + graphicsAllocation.storageInfo.cloningOfPageTables = true; + csr->writeMemoryWithAubManager(graphicsAllocation); + + EXPECT_TRUE(mockManager->writeMemory2Called); + EXPECT_EQ(MemoryConstants::pageSize64k, mockManager->writeMemoryPageSizePassed); + + graphicsAllocation.storageInfo.cloningOfPageTables = false; + csr->writeMemoryWithAubManager(graphicsAllocation); + + if (graphicsAllocation.isAllocatedInLocalMemoryPool()) { + EXPECT_TRUE(mockHardwareContext->writeMemory2Called); + EXPECT_EQ(MemoryConstants::pageSize64k, mockHardwareContext->writeMemoryPageSizePassed); + } else { + EXPECT_TRUE(mockManager->writeMemory2Called); + EXPECT_EQ(MemoryConstants::pageSize64k, mockManager->writeMemoryPageSizePassed); + } + } +} diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_tests.cpp index 00db7f0cd2..7852a0906f 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -25,6 +25,7 @@ #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/test_checks_shared.h" +#include "opencl/source/command_stream/definitions/command_stream_receiver_simulated_hw.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" @@ -43,7 +44,6 @@ #include "opencl/test/unit_test/mocks/mock_program.h" #include "test.h" -#include "command_stream_receiver_simulated_hw.h" #include "gmock/gmock.h" using namespace NEO; diff --git a/opencl/test/unit_test/gen12lp/command_stream_receiver_simulated_common_hw_tests_gen12lp.inl b/opencl/test/unit_test/gen12lp/command_stream_receiver_simulated_common_hw_tests_gen12lp.inl index dd2034e94e..ab57267b3f 100644 --- a/opencl/test/unit_test/gen12lp/command_stream_receiver_simulated_common_hw_tests_gen12lp.inl +++ b/opencl/test/unit_test/gen12lp/command_stream_receiver_simulated_common_hw_tests_gen12lp.inl @@ -7,12 +7,11 @@ #include "shared/test/common/helpers/debug_manager_state_restore.h" +#include "opencl/source/command_stream/definitions/command_stream_receiver_simulated_hw.h" #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_aub_stream.h" #include "test.h" -#include "command_stream_receiver_simulated_hw.h" - using namespace NEO; using Gen12LPCommandStreamReceiverSimulatedCommonHwTests = Test; diff --git a/opencl/test/unit_test/memory_manager/unified_memory_manager_tests.cpp b/opencl/test/unit_test/memory_manager/unified_memory_manager_tests.cpp index 8424295d62..1f1f9d49da 100644 --- a/opencl/test/unit_test/memory_manager/unified_memory_manager_tests.cpp +++ b/opencl/test/unit_test/memory_manager/unified_memory_manager_tests.cpp @@ -1235,3 +1235,33 @@ HWTEST_F(UnfiedSharedMemoryHWTest, givenSharedUsmAllocationWhenReadBufferThenCpu delete buffer; clMemFreeINTEL(&mockContext, sharedMemory); } + +TEST(UnifiedMemoryManagerTest, givenEnableStatelessCompressionWhenDeviceAllocationIsCreatedThenAllocationTypeIsBufferCompressed) { + DebugManagerStateRestore restore; + + cl_int retVal = CL_SUCCESS; + MockContext mockContext; + + auto device = mockContext.getDevice(0u); + auto allocationsManager = mockContext.getSVMAllocsManager(); + + for (auto enable : {false, true}) { + DebugManager.flags.EnableStatelessCompression.set(enable); + + auto deviceMemAllocPtr = clDeviceMemAllocINTEL(&mockContext, device, nullptr, 2048, 0, &retVal); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_NE(nullptr, deviceMemAllocPtr); + + auto deviceMemAlloc = allocationsManager->getSVMAllocs()->get(deviceMemAllocPtr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); + EXPECT_NE(nullptr, deviceMemAlloc); + + if (enable) { + EXPECT_EQ(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED, deviceMemAlloc->getAllocationType()); + } else { + EXPECT_EQ(GraphicsAllocation::AllocationType::BUFFER, deviceMemAlloc->getAllocationType()); + } + + retVal = clMemFreeINTEL(&mockContext, deviceMemAllocPtr); + EXPECT_EQ(CL_SUCCESS, retVal); + } +} diff --git a/opencl/test/unit_test/mocks/CMakeLists.txt b/opencl/test/unit_test/mocks/CMakeLists.txt index 86e3cab0fa..901df38294 100644 --- a/opencl/test/unit_test/mocks/CMakeLists.txt +++ b/opencl/test/unit_test/mocks/CMakeLists.txt @@ -25,6 +25,7 @@ set(IGDRCL_SRCS_tests_mocks ${CMAKE_CURRENT_SOURCE_DIR}/mock_context.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_context.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_csr.h + ${CMAKE_CURRENT_SOURCE_DIR}/mock_csr_simulated_common_hw.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_device_queue.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_event.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_execution_environment.h diff --git a/opencl/test/unit_test/mocks/mock_csr_simulated_common_hw.h b/opencl/test/unit_test/mocks/mock_csr_simulated_common_hw.h new file mode 100644 index 0000000000..f87f4a6f58 --- /dev/null +++ b/opencl/test/unit_test/mocks/mock_csr_simulated_common_hw.h @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once + +#include "opencl/source/command_stream/definitions/command_stream_receiver_simulated_hw.h" + +namespace NEO { + +template +class MockSimulatedCsrHw : public CommandStreamReceiverSimulatedHw { + public: + using CommandStreamReceiverSimulatedHw::CommandStreamReceiverSimulatedHw; + using CommandStreamReceiverSimulatedHw::localMemoryEnabled; + using CommandStreamReceiverSimulatedHw::aubManager; + using CommandStreamReceiverSimulatedHw::hardwareContextController; + using CommandStreamReceiverSimulatedHw::writeMemory; + void writeMemory(uint64_t gpuAddress, void *cpuAddress, size_t size, uint32_t memoryBank, uint64_t entryBits) override { + } + void pollForCompletion() override { + } + bool writeMemory(GraphicsAllocation &gfxAllocation) override { + return true; + } + void writeMemoryWithAubManager(GraphicsAllocation &graphicsAllocation) override { + CommandStreamReceiverSimulatedHw::writeMemoryWithAubManager(graphicsAllocation); + } + void writeMMIO(uint32_t offset, uint32_t value) override {} + bool isMultiOsContextCapable() const override { + return multiOsContextCapable; + } + void dumpAllocation(GraphicsAllocation &graphicsAllocation) override {} + bool multiOsContextCapable = false; +}; + +} // namespace NEO diff --git a/opencl/test/unit_test/os_interface/linux/drm_memory_manager_allocate_in_device_pool_tests_dg1.cpp b/opencl/test/unit_test/os_interface/linux/drm_memory_manager_allocate_in_device_pool_tests_dg1.cpp index a9e0654747..0e2aac7331 100644 --- a/opencl/test/unit_test/os_interface/linux/drm_memory_manager_allocate_in_device_pool_tests_dg1.cpp +++ b/opencl/test/unit_test/os_interface/linux/drm_memory_manager_allocate_in_device_pool_tests_dg1.cpp @@ -882,19 +882,18 @@ TEST_F(DrmMemoryManagerLocalMemoryTest, givenAllocationWithKernelIsaWhenAllocati EXPECT_EQ(MemoryManager::AllocationStatus::Success, status); EXPECT_EQ(MemoryPool::LocalMemory, allocation->getMemoryPool()); EXPECT_NE(0u, allocation->getGpuAddress()); - EXPECT_EQ(EngineLimits::maxHandleCount, allocation->getNumGmms()); + EXPECT_EQ(1u, allocation->getNumGmms()); auto drmAllocation = static_cast(allocation); auto &bos = drmAllocation->getBOs(); auto boAddress = drmAllocation->getGpuAddress(); - for (auto handleId = 0u; handleId < EngineLimits::maxHandleCount; handleId++) { - auto bo = bos[handleId]; - ASSERT_NE(nullptr, bo); - auto boSize = allocation->getGmm(handleId)->gmmResourceInfo->getSizeAllocation(); - EXPECT_EQ(boAddress, bo->peekAddress()); - EXPECT_EQ(boSize, bo->peekSize()); - EXPECT_EQ(boSize, 3 * MemoryConstants::pageSize64k); - } + + auto bo = bos[0]; + ASSERT_NE(nullptr, bo); + auto boSize = allocation->getGmm(0)->gmmResourceInfo->getSizeAllocation(); + EXPECT_EQ(boAddress, bo->peekAddress()); + EXPECT_EQ(boSize, bo->peekSize()); + EXPECT_EQ(boSize, 3 * MemoryConstants::pageSize64k); memoryManager->freeGraphicsMemory(allocation); } diff --git a/opencl/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp b/opencl/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp index c27d4a9051..7b7e557732 100644 --- a/opencl/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp +++ b/opencl/test/unit_test/os_interface/windows/wddm_memory_manager_tests.cpp @@ -2019,7 +2019,7 @@ TEST_F(WddmMemoryManagerSimpleTest, whenDestroyingAllocationWithReservedGpuVirtu } TEST_F(WddmMemoryManagerSimpleTest, givenAllocationWithReservedGpuVirtualAddressWhenMapCallFailsDuringCreateWddmAllocationThenReleasePreferredAddress) { - MockWddmAllocation allocation; + MockWddmAllocation allocation(1); allocation.setAllocationType(GraphicsAllocation::AllocationType::KERNEL_ISA); uint64_t gpuAddress = 0x123; uint64_t sizeForFree = 0x1234; diff --git a/shared/source/command_stream/tbx_command_stream_receiver_hw.h b/shared/source/command_stream/tbx_command_stream_receiver_hw.h index 7d8ff2b43d..2108a42434 100644 --- a/shared/source/command_stream/tbx_command_stream_receiver_hw.h +++ b/shared/source/command_stream/tbx_command_stream_receiver_hw.h @@ -11,8 +11,9 @@ #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/source/memory_manager/page_table.h" +#include "opencl/source/command_stream/definitions/command_stream_receiver_simulated_hw.h" + #include "aub_mapper.h" -#include "command_stream_receiver_simulated_hw.h" #include diff --git a/shared/source/memory_manager/CMakeLists.txt b/shared/source/memory_manager/CMakeLists.txt index 87cab046a6..79a9ce3814 100644 --- a/shared/source/memory_manager/CMakeLists.txt +++ b/shared/source/memory_manager/CMakeLists.txt @@ -18,7 +18,7 @@ set(NEO_CORE_MEMORY_MANAGER ${CMAKE_CURRENT_SOURCE_DIR}/deferrable_deletion.h ${CMAKE_CURRENT_SOURCE_DIR}/deferred_deleter.cpp ${CMAKE_CURRENT_SOURCE_DIR}/deferred_deleter.h - ${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/engine_limits.h + ${CMAKE_CURRENT_SOURCE_DIR}/definitions/engine_limits.h ${CMAKE_CURRENT_SOURCE_DIR}/definitions/storage_info.cpp ${CMAKE_CURRENT_SOURCE_DIR}/definitions/storage_info.h ${CMAKE_CURRENT_SOURCE_DIR}/eviction_status.h @@ -27,7 +27,6 @@ set(NEO_CORE_MEMORY_MANAGER ${CMAKE_CURRENT_SOURCE_DIR}/gfx_partition.h ${CMAKE_CURRENT_SOURCE_DIR}/graphics_allocation.cpp ${CMAKE_CURRENT_SOURCE_DIR}/graphics_allocation.h - ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/graphics_allocation_extra.cpp ${CMAKE_CURRENT_SOURCE_DIR}/host_ptr_defines.h ${CMAKE_CURRENT_SOURCE_DIR}/host_ptr_manager.cpp ${CMAKE_CURRENT_SOURCE_DIR}/host_ptr_manager.h @@ -53,7 +52,6 @@ set(NEO_CORE_MEMORY_MANAGER ${CMAKE_CURRENT_SOURCE_DIR}/surface.h ${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_manager.cpp ${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_manager.h - ${CMAKE_CURRENT_SOURCE_DIR}/${BRANCH_DIR_SUFFIX}/unified_memory_manager_extra.cpp ${CMAKE_CURRENT_SOURCE_DIR}/page_table.cpp ${CMAKE_CURRENT_SOURCE_DIR}/page_table.h ${CMAKE_CURRENT_SOURCE_DIR}/page_table.inl diff --git a/shared/source/memory_manager/definitions/engine_limits.h b/shared/source/memory_manager/definitions/engine_limits.h index 406000bc3b..93367a97f9 100644 --- a/shared/source/memory_manager/definitions/engine_limits.h +++ b/shared/source/memory_manager/definitions/engine_limits.h @@ -11,7 +11,7 @@ namespace NEO { namespace EngineLimits { -constexpr uint32_t maxHandleCount = 1u; +constexpr uint32_t maxHandleCount = 4u; }; // namespace EngineLimits } // namespace NEO diff --git a/shared/source/memory_manager/graphics_allocation.cpp b/shared/source/memory_manager/graphics_allocation.cpp index b370c9aff4..f810108b85 100644 --- a/shared/source/memory_manager/graphics_allocation.cpp +++ b/shared/source/memory_manager/graphics_allocation.cpp @@ -80,6 +80,24 @@ bool GraphicsAllocation::isAllocationLockable() const { return 0 == gmm->resourceParams.Flags.Info.NotLockable; } +void GraphicsAllocation::setAubWritable(bool writable, uint32_t banks) { + UNRECOVERABLE_IF(banks == 0); + aubInfo.aubWritable = static_cast(setBits(aubInfo.aubWritable, writable, banks)); +} + +bool GraphicsAllocation::isAubWritable(uint32_t banks) const { + return isAnyBitSet(aubInfo.aubWritable, banks); +} + +void GraphicsAllocation::setTbxWritable(bool writable, uint32_t banks) { + UNRECOVERABLE_IF(banks == 0); + aubInfo.tbxWritable = static_cast(setBits(aubInfo.tbxWritable, writable, banks)); +} + +bool GraphicsAllocation::isTbxWritable(uint32_t banks) const { + return isAnyBitSet(aubInfo.tbxWritable, banks); +} + constexpr uint32_t GraphicsAllocation::objectNotUsed; constexpr uint32_t GraphicsAllocation::objectNotResident; constexpr uint32_t GraphicsAllocation::objectAlwaysResident; diff --git a/shared/source/memory_manager/graphics_allocation.h b/shared/source/memory_manager/graphics_allocation.h index 66ee35d48a..3c4753912c 100644 --- a/shared/source/memory_manager/graphics_allocation.h +++ b/shared/source/memory_manager/graphics_allocation.h @@ -10,14 +10,13 @@ #include "shared/source/helpers/constants.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/ptr_math.h" +#include "shared/source/memory_manager/definitions/engine_limits.h" #include "shared/source/memory_manager/definitions/storage_info.h" #include "shared/source/memory_manager/host_ptr_defines.h" #include "shared/source/memory_manager/memory_pool.h" #include "shared/source/utilities/idlist.h" #include "shared/source/utilities/stackvec.h" -#include "engine_limits.h" - #include #include #include diff --git a/shared/source/memory_manager/graphics_allocation_extra.cpp b/shared/source/memory_manager/graphics_allocation_extra.cpp deleted file mode 100644 index 4797093afa..0000000000 --- a/shared/source/memory_manager/graphics_allocation_extra.cpp +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Copyright (C) 2019-2020 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "shared/source/memory_manager/graphics_allocation.h" - -namespace NEO { - -void GraphicsAllocation::setAubWritable(bool writable, uint32_t banks) { aubInfo.aubWritable = writable; } -bool GraphicsAllocation::isAubWritable(uint32_t banks) const { return (aubInfo.aubWritable != 0); } -void GraphicsAllocation::setTbxWritable(bool writable, uint32_t banks) { aubInfo.tbxWritable = writable; } -bool GraphicsAllocation::isTbxWritable(uint32_t banks) const { return (aubInfo.tbxWritable != 0); } - -} // namespace NEO diff --git a/shared/source/memory_manager/unified_memory_manager.cpp b/shared/source/memory_manager/unified_memory_manager.cpp index 0cb661a282..8af172e8fd 100644 --- a/shared/source/memory_manager/unified_memory_manager.cpp +++ b/shared/source/memory_manager/unified_memory_manager.cpp @@ -496,4 +496,20 @@ void SVMAllocsManager::removeSvmMapOperation(const void *regionSvmPtr) { svmMapOperations.remove(regionSvmPtr); } +GraphicsAllocation::AllocationType SVMAllocsManager::getGraphicsAllocationType(const UnifiedMemoryProperties &unifiedMemoryProperties) const { + GraphicsAllocation::AllocationType allocationType = GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY; + if (unifiedMemoryProperties.memoryType == InternalMemoryType::DEVICE_UNIFIED_MEMORY) { + if (unifiedMemoryProperties.allocationFlags.allocFlags.allocWriteCombined) { + allocationType = GraphicsAllocation::AllocationType::WRITE_COMBINED; + } else { + if (DebugManager.flags.EnableStatelessCompression.get()) { + allocationType = GraphicsAllocation::AllocationType::BUFFER_COMPRESSED; + } else { + allocationType = GraphicsAllocation::AllocationType::BUFFER; + } + } + } + return allocationType; +} + } // namespace NEO diff --git a/shared/source/memory_manager/unified_memory_manager_extra.cpp b/shared/source/memory_manager/unified_memory_manager_extra.cpp deleted file mode 100644 index deb370798a..0000000000 --- a/shared/source/memory_manager/unified_memory_manager_extra.cpp +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (C) 2020 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "shared/source/memory_manager/unified_memory_manager.h" - -namespace NEO { - -GraphicsAllocation::AllocationType SVMAllocsManager::getGraphicsAllocationType(const UnifiedMemoryProperties &unifiedMemoryProperties) const { - GraphicsAllocation::AllocationType allocationType = GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY; - if (unifiedMemoryProperties.memoryType == InternalMemoryType::DEVICE_UNIFIED_MEMORY) { - if (unifiedMemoryProperties.allocationFlags.allocFlags.allocWriteCombined) { - allocationType = GraphicsAllocation::AllocationType::WRITE_COMBINED; - } else { - allocationType = GraphicsAllocation::AllocationType::BUFFER; - } - } - return allocationType; -} - -} // namespace NEO diff --git a/shared/source/os_interface/linux/drm_buffer_object.h b/shared/source/os_interface/linux/drm_buffer_object.h index 146961c790..306c8df904 100644 --- a/shared/source/os_interface/linux/drm_buffer_object.h +++ b/shared/source/os_interface/linux/drm_buffer_object.h @@ -7,11 +7,11 @@ #pragma once +#include "shared/source/memory_manager/definitions/engine_limits.h" #include "shared/source/os_interface/linux/cache_info.h" #include "shared/source/utilities/stackvec.h" #include "drm/i915_drm.h" -#include "engine_limits.h" #include #include diff --git a/shared/source/os_interface/linux/drm_neo.h b/shared/source/os_interface/linux/drm_neo.h index 6f7cc109c7..a179355603 100644 --- a/shared/source/os_interface/linux/drm_neo.h +++ b/shared/source/os_interface/linux/drm_neo.h @@ -8,6 +8,7 @@ #pragma once #include "shared/source/gmm_helper/gmm_lib.h" #include "shared/source/helpers/basic_math.h" +#include "shared/source/memory_manager/definitions/engine_limits.h" #include "shared/source/os_interface/driver_info.h" #include "shared/source/os_interface/linux/cache_info.h" #include "shared/source/os_interface/linux/engine_info.h" @@ -18,7 +19,6 @@ #include "shared/source/utilities/stackvec.h" #include "drm/i915_drm.h" -#include "engine_limits.h" #include "engine_node.h" #include "igfxfmid.h" diff --git a/shared/test/unit_test/memory_manager/graphics_allocation_tests.cpp b/shared/test/unit_test/memory_manager/graphics_allocation_tests.cpp index 9c1d4f1ea9..8d0f529d60 100644 --- a/shared/test/unit_test/memory_manager/graphics_allocation_tests.cpp +++ b/shared/test/unit_test/memory_manager/graphics_allocation_tests.cpp @@ -7,7 +7,9 @@ #include "shared/test/common/mocks/mock_graphics_allocation.h" -#include "gtest/gtest.h" +#include "opencl/test/unit_test/mocks/mock_aub_csr.h" +#include "opencl/test/unit_test/mocks/mock_execution_environment.h" +#include "test.h" using namespace NEO; @@ -237,3 +239,164 @@ TEST(GraphicsAllocationTest, givenGraphicsAllocationWhenQueryingUsedPageSizeThen EXPECT_EQ(MemoryConstants::pageSize64k, graphicsAllocation.getUsedPageSize()); } } + +struct GraphicsAllocationTests : public ::testing::Test { + template + void initializeCsr() { + executionEnvironment.initializeMemoryManager(); + DeviceBitfield deviceBitfield(3); + auto csr = new MockAubCsr("", true, executionEnvironment, 0, deviceBitfield); + csr->multiOsContextCapable = true; + aubCsr.reset(csr); + } + + template + MockAubCsr &getAubCsr() { + return *(static_cast *>(aubCsr.get())); + } + + void gfxAllocationSetToDefault() { + graphicsAllocation.storageInfo.readOnlyMultiStorage = false; + graphicsAllocation.storageInfo.memoryBanks = 0; + graphicsAllocation.overrideMemoryPool(MemoryPool::MemoryNull); + } + + void gfxAllocationEnableReadOnlyMultiStorage(uint32_t banks) { + graphicsAllocation.storageInfo.cloningOfPageTables = false; + graphicsAllocation.storageInfo.readOnlyMultiStorage = true; + graphicsAllocation.storageInfo.memoryBanks = banks; + graphicsAllocation.overrideMemoryPool(MemoryPool::LocalMemory); + } + + MockExecutionEnvironment executionEnvironment; + std::unique_ptr aubCsr; + MockGraphicsAllocation graphicsAllocation; +}; + +HWTEST_F(GraphicsAllocationTests, givenGraphicsAllocationWhenIsAubWritableIsCalledThenTrueIsReturned) { + initializeCsr(); + auto &aubCsr = getAubCsr(); + + gfxAllocationSetToDefault(); + EXPECT_TRUE(aubCsr.isAubWritable(graphicsAllocation)); + + gfxAllocationEnableReadOnlyMultiStorage(0b1111); + EXPECT_TRUE(aubCsr.isAubWritable(graphicsAllocation)); +} + +HWTEST_F(GraphicsAllocationTests, givenGraphicsAllocationThatHasPageTablesCloningWhenWriteableFlagsAreUsedThenDefaultBankIsUsed) { + initializeCsr(); + auto &aubCsr = getAubCsr(); + + gfxAllocationSetToDefault(); + graphicsAllocation.storageInfo.memoryBanks = 0x2; + graphicsAllocation.overrideMemoryPool(MemoryPool::LocalMemory); + graphicsAllocation.storageInfo.cloningOfPageTables = true; + + EXPECT_TRUE(aubCsr.isAubWritable(graphicsAllocation)); + + //modify non default bank + graphicsAllocation.setAubWritable(false, 0x2); + + EXPECT_TRUE(aubCsr.isAubWritable(graphicsAllocation)); + + aubCsr.setAubWritable(false, graphicsAllocation); + + EXPECT_FALSE(aubCsr.isAubWritable(graphicsAllocation)); + + EXPECT_TRUE(aubCsr.isTbxWritable(graphicsAllocation)); + + graphicsAllocation.setTbxWritable(false, 0x2); + EXPECT_TRUE(aubCsr.isTbxWritable(graphicsAllocation)); + + aubCsr.setTbxWritable(false, graphicsAllocation); + + EXPECT_FALSE(aubCsr.isTbxWritable(graphicsAllocation)); +} + +HWTEST_F(GraphicsAllocationTests, givenGraphicsAllocationWhenAubWritableIsSetToFalseThenAubWritableIsFalse) { + initializeCsr(); + auto &aubCsr = getAubCsr(); + + gfxAllocationSetToDefault(); + aubCsr.setAubWritable(false, graphicsAllocation); + EXPECT_FALSE(aubCsr.isAubWritable(graphicsAllocation)); + + gfxAllocationEnableReadOnlyMultiStorage(0b1111); + aubCsr.setAubWritable(false, graphicsAllocation); + EXPECT_FALSE(aubCsr.isAubWritable(graphicsAllocation)); +} + +HWTEST_F(GraphicsAllocationTests, givenMultiStorageGraphicsAllocationWhenAubWritableIsSetOnSpecificBanksThenCorrectValuesAreSet) { + initializeCsr(); + auto &aubCsr = getAubCsr(); + gfxAllocationEnableReadOnlyMultiStorage(0b1010); + + aubCsr.setAubWritable(false, graphicsAllocation); + EXPECT_EQ(graphicsAllocation.aubInfo.aubWritable, maxNBitValue(32) & ~(0b1010)); + + EXPECT_FALSE(graphicsAllocation.isAubWritable(0b10)); + EXPECT_FALSE(graphicsAllocation.isAubWritable(0b1000)); + EXPECT_FALSE(graphicsAllocation.isAubWritable(0b1010)); + EXPECT_TRUE(graphicsAllocation.isAubWritable(0b1)); + EXPECT_TRUE(graphicsAllocation.isAubWritable(0b100)); + EXPECT_TRUE(graphicsAllocation.isAubWritable(0b101)); + + aubCsr.setAubWritable(true, graphicsAllocation); + EXPECT_EQ(graphicsAllocation.aubInfo.aubWritable, maxNBitValue(32)); + EXPECT_TRUE(graphicsAllocation.isAubWritable(0b1)); + EXPECT_TRUE(graphicsAllocation.isAubWritable(0b10)); + EXPECT_TRUE(graphicsAllocation.isAubWritable(0b100)); + EXPECT_TRUE(graphicsAllocation.isAubWritable(0b1000)); + EXPECT_TRUE(graphicsAllocation.isAubWritable(0b101)); + EXPECT_TRUE(graphicsAllocation.isAubWritable(0b1010)); +} + +HWTEST_F(GraphicsAllocationTests, givenGraphicsAllocationWhenIsTbxWritableIsCalledThenTrueIsReturned) { + initializeCsr(); + auto &aubCsr = getAubCsr(); + + gfxAllocationSetToDefault(); + EXPECT_TRUE(aubCsr.isTbxWritable(graphicsAllocation)); + + gfxAllocationEnableReadOnlyMultiStorage(0b1111); + EXPECT_TRUE(aubCsr.isTbxWritable(graphicsAllocation)); +}; + +HWTEST_F(GraphicsAllocationTests, givenGraphicsAllocationWhenTbxWritableIsSetToFalseThenTbxWritableIsFalse) { + initializeCsr(); + auto &aubCsr = getAubCsr(); + + gfxAllocationSetToDefault(); + aubCsr.setTbxWritable(false, graphicsAllocation); + EXPECT_FALSE(aubCsr.isTbxWritable(graphicsAllocation)); + + gfxAllocationEnableReadOnlyMultiStorage(0b1111); + aubCsr.setTbxWritable(false, graphicsAllocation); + EXPECT_FALSE(aubCsr.isTbxWritable(graphicsAllocation)); +} + +HWTEST_F(GraphicsAllocationTests, givenMultiStorageGraphicsAllocationWhenTbxWritableIsSetOnSpecificBanksThenCorrectValuesAreSet) { + initializeCsr(); + auto &aubCsr = getAubCsr(); + gfxAllocationEnableReadOnlyMultiStorage(0b1010); + + aubCsr.setTbxWritable(false, graphicsAllocation); + EXPECT_EQ(graphicsAllocation.aubInfo.tbxWritable, maxNBitValue(32) & ~(0b1010)); + + EXPECT_FALSE(graphicsAllocation.isTbxWritable(0b10)); + EXPECT_FALSE(graphicsAllocation.isTbxWritable(0b1000)); + EXPECT_FALSE(graphicsAllocation.isTbxWritable(0b1010)); + EXPECT_TRUE(graphicsAllocation.isTbxWritable(0b1)); + EXPECT_TRUE(graphicsAllocation.isTbxWritable(0b100)); + EXPECT_TRUE(graphicsAllocation.isTbxWritable(0b101)); + + aubCsr.setTbxWritable(true, graphicsAllocation); + EXPECT_EQ(graphicsAllocation.aubInfo.tbxWritable, maxNBitValue(32)); + EXPECT_TRUE(graphicsAllocation.isTbxWritable(0b1)); + EXPECT_TRUE(graphicsAllocation.isTbxWritable(0b10)); + EXPECT_TRUE(graphicsAllocation.isTbxWritable(0b100)); + EXPECT_TRUE(graphicsAllocation.isTbxWritable(0b1000)); + EXPECT_TRUE(graphicsAllocation.isTbxWritable(0b101)); + EXPECT_TRUE(graphicsAllocation.isTbxWritable(0b1010)); +} \ No newline at end of file