From cf3817e05884e25f8b0c3d5c73d1b7a8e5bfc375 Mon Sep 17 00:00:00 2001 From: Mateusz Jablonski Date: Wed, 15 Jun 2022 01:12:33 +0000 Subject: [PATCH] Add debug flag for EOT WA EOT WA requires allocating last 64KB of kernel heap and putting EOT signature at the last 16 bytes of kernel heap Related-To: NEO-7099 Signed-off-by: Mateusz Jablonski --- ...and_stream_receiver_flush_task_2_tests.cpp | 19 +++++ .../command_stream/command_stream_receiver.h | 4 + .../command_stream_receiver_hw_base.inl | 7 ++ .../debug_settings/debug_variables_base.inl | 1 + shared/source/device/device.cpp | 20 +++++ shared/source/device/device.h | 1 + shared/source/device/root_device.cpp | 5 +- .../source/memory_manager/gfx_partition.cpp | 8 +- .../source/memory_manager/memory_manager.cpp | 5 +- .../os_interface/linux/drm_memory_manager.cpp | 19 ++++- .../windows/wddm_memory_manager.cpp | 11 ++- .../libult/ult_command_stream_receiver.h | 1 + shared/test/common/mock_gdi/mock_gdi.cpp | 4 +- shared/test/common/mocks/mock_device.h | 1 + shared/test/common/test_files/igdrcl.config | 1 + .../command_stream_receiver_tests.cpp | 14 +++- .../unit_test/device/neo_device_tests.cpp | 81 +++++++++++++++++++ .../memory_manager/gfx_partition_tests.cpp | 19 +++++ .../linux/drm_memory_manager_tests.cpp | 14 ++++ .../os_interface/windows/CMakeLists.txt | 1 + ...ddm_memory_manager_with_localmem_tests.cpp | 47 +++++++++++ 21 files changed, 271 insertions(+), 12 deletions(-) create mode 100644 shared/test/unit_test/os_interface/windows/wddm_memory_manager_with_localmem_tests.cpp diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp index d8453eece1..e26e1e63f3 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp @@ -500,6 +500,25 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, WhenFlushingThenScratchAllocationI EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(scratchAllocation)); } +HWTEST_F(CommandStreamReceiverFlushTaskTests, givenAdditionalAllocationForResidencyWhenFlushingThenHandleResidency) { + auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); + pDevice->resetCommandStreamReceiver(commandStreamReceiver); + + MockGraphicsAllocation allocation{}; + commandStreamReceiver->addAdditionalAllocationForResidency(&allocation); + + flushTask(*commandStreamReceiver); + + auto tagAllocation = commandStreamReceiver->getTagAllocation(); + ASSERT_NE(tagAllocation, nullptr); + + EXPECT_TRUE(commandStreamReceiver->isMadeResident(tagAllocation)); + EXPECT_TRUE(commandStreamReceiver->isMadeResident(&allocation)); + + EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(tagAllocation)); + EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(&allocation)); +} + HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCommandStreamReceiverWhenFenceAllocationIsRequiredAndFlushTaskIsCalledThenFenceAllocationIsMadeResident) { RAIIHwHelperFactory> hwHelperBackup{pDevice->getHardwareInfo().platform.eRenderCoreFamily}; diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index 164a33f7ce..5381286ae5 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -337,6 +337,9 @@ class CommandStreamReceiver { DispatchMode getDispatchMode() const { return this->dispatchMode; } + void addAdditionalAllocationForResidency(GraphicsAllocation *graphicsAllocation) { + this->additionalAllocationsForResidency.push_back(graphicsAllocation); + } LogicalStateHelper *getLogicalStateHelper() const; @@ -389,6 +392,7 @@ class CommandStreamReceiver { GraphicsAllocation *perDssBackedBuffer = nullptr; GraphicsAllocation *clearColorAllocation = nullptr; GraphicsAllocation *workPartitionAllocation = nullptr; + StackVec additionalAllocationsForResidency; MultiGraphicsAllocation *tagsMultiAllocation = nullptr; diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index f35ebdd398..8e9836412d 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -515,6 +515,10 @@ CompletionStamp CommandStreamReceiverHw::flushTask( this->makeResident(*tagAllocation); + for (auto &additionalAllocationForResidency : additionalAllocationsForResidency) { + this->makeResident(*additionalAllocationForResidency); + } + if (globalFenceAllocation) { makeResident(*globalFenceAllocation); } @@ -772,6 +776,9 @@ inline bool CommandStreamReceiverHw::flushBatchedSubmissions() { for (auto &surface : resourcePackage) { surfacesForSubmit.push_back(surface); } + for (auto &additionalAllocationForResidency : additionalAllocationsForResidency) { + surfacesForSubmit.push_back(additionalAllocationForResidency); + } // make sure we flush DC if needed if (epiloguePipeControlLocation && MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo)) { diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index ac15be58a1..76ffeb70b7 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -74,6 +74,7 @@ DECLARE_DEBUG_VARIABLE(bool, PrintMemoryRegionSizes, false, "print memory bank t DECLARE_DEBUG_VARIABLE(bool, UpdateCrossThreadDataSize, false, "Turn on cross thread data size calculation for PATCH TOKEN binary") DECLARE_DEBUG_VARIABLE(bool, UseNewQueryTopoIoctl, true, "Use DRM_I915_QUERY_COMPUTE_SLICES") DECLARE_DEBUG_VARIABLE(bool, DisableGpuHangDetection, false, "Disable GPU hang detection") +DECLARE_DEBUG_VARIABLE(bool, EnableEotWa, false, "Enable WA to program EOT instruction at the end of kernel heap") DECLARE_DEBUG_VARIABLE(std::string, ForceDeviceId, std::string("unk"), "DeviceId selected for testing") DECLARE_DEBUG_VARIABLE(std::string, FilterDeviceId, std::string("unk"), "Device id filter, adapter matching device id will be opened. Ignored when unk.") DECLARE_DEBUG_VARIABLE(std::string, FilterBdfPath, std::string("unk"), "Linux-only, BDF path filter, only matching paths will be opened. Ignored when unk.") diff --git a/shared/source/device/device.cpp b/shared/source/device/device.cpp index 5fe1602c66..39f9d774ee 100644 --- a/shared/source/device/device.cpp +++ b/shared/source/device/device.cpp @@ -59,6 +59,9 @@ Device::~Device() { syncBufferHandler.reset(); commandStreamReceivers.clear(); + if (kernelEotWaAllocation) { + executionEnvironment->memoryManager->freeGraphicsMemory(kernelEotWaAllocation); + } executionEnvironment->memoryManager->waitForDeletions(); executionEnvironment->decRefInternal(); @@ -215,10 +218,22 @@ bool Device::createDeviceImpl() { if (getDebugger() && hwHelper.disableL3CacheForDebug(hwInfo)) { getGmmHelper()->forceAllResourcesUncached(); } + if (DebugManager.flags.EnableEotWa.get()) { + AllocationProperties allocationProperties{rootDeviceIndex, MemoryConstants::pageSize64k, AllocationType::KERNEL_ISA, deviceBitfield}; + auto memoryManager = executionEnvironment->memoryManager.get(); + auto heapBase = memoryManager->getInternalHeapBaseAddress(rootDeviceIndex, memoryManager->isLocalMemoryUsedForIsa(rootDeviceIndex)); + allocationProperties.gpuAddress = heapBase + 4 * MemoryConstants::gigaByte - MemoryConstants::pageSize64k; + kernelEotWaAllocation = executionEnvironment->memoryManager->allocateGraphicsMemoryWithProperties(allocationProperties); + } if (!createEngines()) { return false; } + if (kernelEotWaAllocation) { + auto memoryManager = executionEnvironment->memoryManager.get(); + uint8_t eotMemoryPattern[]{0x31, 0x09, 0x0C, 0x80, 0x04, 0x00, 0x00, 0x00, 0x0C, 0x7F, 0x20, 0x30, 0x00, 0x00, 0x00, 0x00}; + memoryManager->copyMemoryToAllocation(kernelEotWaAllocation, MemoryConstants::pageSize64k - sizeof(eotMemoryPattern) - MemoryConstants::pageSize, eotMemoryPattern, sizeof(eotMemoryPattern)); + } getDefaultEngine().osContext->setDefaultContext(true); @@ -373,6 +388,11 @@ bool Device::createEngine(uint32_t deviceCsrIndex, EngineTypeUsage engineTypeUsa addEngineToEngineGroup(engine); } + if (kernelEotWaAllocation) { + if (!EngineHelpers::isBcs(engineType)) { + commandStreamReceiver->addAdditionalAllocationForResidency(kernelEotWaAllocation); + } + } commandStreamReceivers.push_back(std::move(commandStreamReceiver)); return true; diff --git a/shared/source/device/device.h b/shared/source/device/device.h index 76ff5f6c88..c78640fe2c 100644 --- a/shared/source/device/device.h +++ b/shared/source/device/device.h @@ -202,6 +202,7 @@ class Device : public ReferenceTrackedObject { uintptr_t specializedDevice = reinterpret_cast(nullptr); + GraphicsAllocation *kernelEotWaAllocation = nullptr; GraphicsAllocation *rtMemoryBackedBuffer = nullptr; std::vector rtDispatchGlobals; struct { diff --git a/shared/source/device/root_device.cpp b/shared/source/device/root_device.cpp index cc66f69a47..647e2eb337 100644 --- a/shared/source/device/root_device.cpp +++ b/shared/source/device/root_device.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2021 Intel Corporation + * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -67,6 +67,9 @@ void RootDevice::initializeRootCommandStreamReceiver() { rootCommandStreamReceiver->initializeTagAllocation(); rootCommandStreamReceiver->createGlobalFenceAllocation(); rootCommandStreamReceiver->createWorkPartitionAllocation(*this); + if (kernelEotWaAllocation) { + rootCommandStreamReceiver->addAdditionalAllocationForResidency(kernelEotWaAllocation); + } commandStreamReceivers.push_back(std::move(rootCommandStreamReceiver)); EngineControl engine{commandStreamReceivers.back().get(), osContext}; diff --git a/shared/source/memory_manager/gfx_partition.cpp b/shared/source/memory_manager/gfx_partition.cpp index 324b6702ab..1145095460 100644 --- a/shared/source/memory_manager/gfx_partition.cpp +++ b/shared/source/memory_manager/gfx_partition.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2021 Intel Corporation + * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -237,7 +237,11 @@ bool GfxPartition::init(uint64_t gpuAddressSpace, size_t cpuAddressRangeSizeToRe heapInitExternalWithFrontWindow(HeapAssigner::mapExternalWindowIndex(heap), heapAllocate(heap, externalFrontWindowSize), externalFrontWindowSize); } else if (HeapAssigner::isInternalHeap(heap)) { - heapInitWithFrontWindow(heap, gfxBase, gfxHeap32Size, GfxPartition::internalFrontWindowPoolSize); + auto heapSize = gfxHeap32Size; + if (DebugManager.flags.EnableEotWa.get()) { + heapSize = 4 * MemoryConstants::gigaByte - MemoryConstants::pageSize64k; + } + heapInitWithFrontWindow(heap, gfxBase, heapSize, GfxPartition::internalFrontWindowPoolSize); heapInitFrontWindow(HeapAssigner::mapInternalWindowIndex(heap), gfxBase, GfxPartition::internalFrontWindowPoolSize); } else { heapInit(heap, gfxBase, gfxHeap32Size); diff --git a/shared/source/memory_manager/memory_manager.cpp b/shared/source/memory_manager/memory_manager.cpp index 86baf1a228..0e9b15cbae 100644 --- a/shared/source/memory_manager/memory_manager.cpp +++ b/shared/source/memory_manager/memory_manager.cpp @@ -434,8 +434,9 @@ bool MemoryManager::getAllocationData(AllocationData &allocationData, const Allo } allocationData.hostPtr = hostPtr; - if (properties.allocationType == AllocationType::KERNEL_ISA || - properties.allocationType == AllocationType::KERNEL_ISA_INTERNAL) { + if ((properties.allocationType == AllocationType::KERNEL_ISA || + properties.allocationType == AllocationType::KERNEL_ISA_INTERNAL) && + properties.gpuAddress == 0) { allocationData.size = properties.size + hwHelper.getPaddingForISAAllocation(); } else { allocationData.size = properties.size; diff --git a/shared/source/os_interface/linux/drm_memory_manager.cpp b/shared/source/os_interface/linux/drm_memory_manager.cpp index abeafc9bd5..a5e5b1c557 100644 --- a/shared/source/os_interface/linux/drm_memory_manager.cpp +++ b/shared/source/os_interface/linux/drm_memory_manager.cpp @@ -617,8 +617,13 @@ DrmAllocation *DrmMemoryManager::allocate32BitGraphicsMemoryImpl(const Allocatio size_t alignedAllocationSize = alignUp(allocationData.size, MemoryConstants::pageSize); auto allocationSize = alignedAllocationSize; auto gfxPartition = getGfxPartition(allocationData.rootDeviceIndex); - auto gpuVA = gfxPartition->heapAllocate(allocatorToUse, allocationSize); + uint64_t gpuVA; + if (GraphicsAllocation::isIsaAllocationType(allocationData.type) && allocationData.gpuAddress != 0) { + gpuVA = allocationData.gpuAddress; + } else { + gpuVA = gfxPartition->heapAllocate(allocatorToUse, allocationSize); + } if (!gpuVA) { return nullptr; } @@ -1403,9 +1408,15 @@ GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryInDevicePool(const A auto sizeAllocated = sizeAligned; auto gfxPartition = getGfxPartition(allocationData.rootDeviceIndex); - auto gpuAddress = getGpuAddress(this->alignmentSelector, this->heapAssigner, *hwInfo, - allocationData.type, gfxPartition, sizeAllocated, - allocationData.hostPtr, allocationData.flags.resource48Bit, allocationData.flags.use32BitFrontWindow, *gmmHelper); + uint64_t gpuAddress; + + if (GraphicsAllocation::isIsaAllocationType(allocationData.type) && allocationData.gpuAddress != 0) { + gpuAddress = allocationData.gpuAddress; + } else { + gpuAddress = getGpuAddress(this->alignmentSelector, this->heapAssigner, *hwInfo, + allocationData.type, gfxPartition, sizeAllocated, + allocationData.hostPtr, allocationData.flags.resource48Bit, allocationData.flags.use32BitFrontWindow, *gmmHelper); + } auto canonizedGpuAddress = gmmHelper->canonize(gpuAddress); auto allocation = std::make_unique(allocationData.rootDeviceIndex, numHandles, allocationData.type, nullptr, nullptr, canonizedGpuAddress, sizeAligned, MemoryPool::LocalMemory); DrmAllocation *drmAllocation = static_cast(allocation.get()); diff --git a/shared/source/os_interface/windows/wddm_memory_manager.cpp b/shared/source/os_interface/windows/wddm_memory_manager.cpp index 827d785950..2365dfda70 100644 --- a/shared/source/os_interface/windows/wddm_memory_manager.cpp +++ b/shared/source/os_interface/windows/wddm_memory_manager.cpp @@ -403,7 +403,12 @@ GraphicsAllocation *WddmMemoryManager::allocate32BitGraphicsMemoryImpl(const All CacheSettingsHelper::getGmmUsageType(wddmAllocation->getAllocationType(), !!allocationData.flags.uncacheable, *hwInfo), false, {}, true); wddmAllocation->setDefaultGmm(gmm); - if (!createWddmAllocation(wddmAllocation.get(), nullptr)) { + void *requiredGpuAddress = nullptr; + if (GraphicsAllocation::isIsaAllocationType(allocationData.type) && allocationData.gpuAddress != 0) { + requiredGpuAddress = reinterpret_cast(allocationData.gpuAddress); + } + + if (!createWddmAllocation(wddmAllocation.get(), requiredGpuAddress)) { delete gmm; freeSystemMemory(pSysMem); return nullptr; @@ -1095,6 +1100,10 @@ GraphicsAllocation *WddmMemoryManager::allocateGraphicsMemoryInDevicePool(const } } + if (GraphicsAllocation::isIsaAllocationType(allocationData.type) && allocationData.gpuAddress != 0) { + requiredGpuVa = reinterpret_cast(allocationData.gpuAddress); + } + if (!createWddmAllocation(wddmAllocation.get(), requiredGpuVa)) { for (auto handleId = 0u; handleId < allocationData.storageInfo.getNumBanks(); handleId++) { delete wddmAllocation->getGmm(handleId); diff --git a/shared/test/common/libult/ult_command_stream_receiver.h b/shared/test/common/libult/ult_command_stream_receiver.h index 4b0338a24c..bc35891986 100644 --- a/shared/test/common/libult/ult_command_stream_receiver.h +++ b/shared/test/common/libult/ult_command_stream_receiver.h @@ -65,6 +65,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ using BaseClass::wasSubmittedToSingleSubdevice; using BaseClass::CommandStreamReceiver::activePartitions; using BaseClass::CommandStreamReceiver::activePartitionsConfig; + using BaseClass::CommandStreamReceiver::additionalAllocationsForResidency; using BaseClass::CommandStreamReceiver::baseWaitFunction; using BaseClass::CommandStreamReceiver::bindingTableBaseAddressRequired; using BaseClass::CommandStreamReceiver::canUse4GbHeaps; diff --git a/shared/test/common/mock_gdi/mock_gdi.cpp b/shared/test/common/mock_gdi/mock_gdi.cpp index f0594c9600..6403b28c74 100644 --- a/shared/test/common/mock_gdi/mock_gdi.cpp +++ b/shared/test/common/mock_gdi/mock_gdi.cpp @@ -388,11 +388,13 @@ NTSTATUS __stdcall D3DKMTQueryResourceInfoFromNtHandle(IN OUT D3DKMT_QUERYRESOUR return STATUS_SUCCESS; } +uint8_t lockedData[0x20000]{}; + NTSTATUS __stdcall D3DKMTLock2(IN OUT D3DKMT_LOCK2 *lock2) { if (lock2->hAllocation == 0 || lock2->hDevice == 0) { return STATUS_INVALID_PARAMETER; } - lock2->pData = (void *)65536; + lock2->pData = reinterpret_cast((reinterpret_cast(lockedData) + 0x10000) & (-0xFFFF)); return STATUS_SUCCESS; } diff --git a/shared/test/common/mocks/mock_device.h b/shared/test/common/mocks/mock_device.h index 7aaf7fd1d4..224159d465 100644 --- a/shared/test/common/mocks/mock_device.h +++ b/shared/test/common/mocks/mock_device.h @@ -61,6 +61,7 @@ class MockDevice : public RootDevice { using Device::getGlobalMemorySize; using Device::initializeCaps; using Device::isDebuggerActive; + using Device::kernelEotWaAllocation; using Device::regularEngineGroups; using Device::rootCsrCreated; using Device::rtMemoryBackedBuffer; diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index fb4c1291fd..e184f063b0 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -271,6 +271,7 @@ CFEMaximumNumberOfThreads = -1 CFEOverDispatchControl = -1 CFELargeGRFThreadAdjustDisable = -1 SynchronizeWalkerInWparidMode = -1 +EnableEotWa = 0 EnableWalkerPartition = -1 OverrideNumComputeUnitsForScratch = -1 ForceThreadGroupDispatchSize = -1 diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index a38d0fc16a..8baea57490 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -98,6 +98,18 @@ HWTEST_F(CommandStreamReceiverTest, WhenCreatingCsrThenTimestampTypeIs32b) { EXPECT_EQ(expectedOffset, tag->getGlobalStartOffset()); } +HWTEST_F(CommandStreamReceiverTest, whenAddingAdditionalAllocationForResidencyThenItIsRegisteredInCsr) { + auto &csr = pDevice->getUltCommandStreamReceiver(); + + EXPECT_TRUE(csr.additionalAllocationsForResidency.empty()); + + MockGraphicsAllocation allocation{}; + csr.addAdditionalAllocationForResidency(&allocation); + + EXPECT_EQ(1u, csr.additionalAllocationsForResidency.size()); + EXPECT_EQ(&allocation, csr.additionalAllocationsForResidency[0]); +} + HWTEST_F(CommandStreamReceiverTest, WhenCreatingCsrThenFlagsAreSetCorrectly) { auto &csr = pDevice->getUltCommandStreamReceiver(); csr.initProgrammingFlags(); @@ -2074,4 +2086,4 @@ HWTEST_F(CommandStreamReceiverTest, givenMultipleActivePartitionsWhenWaitLogIsEn << " " << tagValue << std::endl; EXPECT_STREQ(expectedOutput.str().c_str(), output.c_str()); -} +} \ No newline at end of file diff --git a/shared/test/unit_test/device/neo_device_tests.cpp b/shared/test/unit_test/device/neo_device_tests.cpp index 31c2869a59..5b91b5a255 100644 --- a/shared/test/unit_test/device/neo_device_tests.cpp +++ b/shared/test/unit_test/device/neo_device_tests.cpp @@ -6,6 +6,7 @@ */ #include "shared/source/device/device.h" +#include "shared/source/os_interface/device_factory.h" #include "shared/test/common/fixtures/device_fixture.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/default_hw_info.h" @@ -13,8 +14,10 @@ #include "shared/test/common/mocks/mock_builtins.h" #include "shared/test/common/mocks/mock_compiler_interface.h" #include "shared/test/common/mocks/mock_compilers.h" +#include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/ult_device_factory.h" +#include "shared/test/common/test_macros/hw_test.h" #include "shared/test/common/test_macros/test.h" using namespace NEO; @@ -46,6 +49,84 @@ TEST(Device, givenNoDebuggerWhenGettingDebuggerThenNullptrIsReturned) { EXPECT_EQ(nullptr, device->getSourceLevelDebugger()); } +using DeviceKernelWaTest = ::testing::Test; + +HWTEST_F(DeviceKernelWaTest, givenEnabledEotWaWhenCreatingDeviceThenKernelWaIsCreatedAndAddedToGpgpuCommandStreamReceiver) { + DebugManagerStateRestore restorer; + DebugManager.flags.EnableEotWa.set(true); + + { + UltDeviceFactory factory{1, 0}; + + auto device = factory.rootDevices[0]; + + EXPECT_NE(nullptr, device->kernelEotWaAllocation); + + for (auto &engine : device->allEngines) { + auto csr = static_cast *>(engine.commandStreamReceiver); + if (EngineHelpers::isBcs(engine.getEngineType())) { + EXPECT_TRUE(csr->additionalAllocationsForResidency.empty()); + } else { + EXPECT_EQ(1u, csr->additionalAllocationsForResidency.size()); + EXPECT_EQ(device->kernelEotWaAllocation, csr->additionalAllocationsForResidency[0]); + } + } + } + { + UltDeviceFactory factory{1, 2}; + + auto device = factory.rootDevices[0]; + + EXPECT_NE(nullptr, device->kernelEotWaAllocation); + + for (auto &engine : device->allEngines) { + auto csr = static_cast *>(engine.commandStreamReceiver); + if (EngineHelpers::isBcs(engine.getEngineType())) { + EXPECT_TRUE(csr->additionalAllocationsForResidency.empty()); + } else { + EXPECT_EQ(1u, csr->additionalAllocationsForResidency.size()); + EXPECT_EQ(device->kernelEotWaAllocation, csr->additionalAllocationsForResidency[0]); + } + } + } +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, DeviceKernelWaTest, givenEnabledEotWaWhenCreatingDeviceThenKernelWaIsCreatedWithProperContentAndGpuAddress) { + if (is32bit) { + GTEST_SKIP(); + } + DebugManagerStateRestore restorer; + DebugManager.flags.EnableEotWa.set(true); + DebugManager.flags.EnableLocalMemory.set(false); + + VariableBackup createRootDeviceFuncBackup{&DeviceFactory::createRootDeviceFunc}; + createRootDeviceFuncBackup = [](ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) -> std::unique_ptr { + return std::unique_ptr(MockDevice::create(&executionEnvironment, rootDeviceIndex)); + }; + VariableBackup backup(&ultHwConfig); + ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; + ultHwConfig.forceOsAgnosticMemoryManager = false; + + auto executionEnvironment = new MockExecutionEnvironment(defaultHwInfo.get(), true, 1); + auto devices = DeviceFactory::createDevices(*executionEnvironment); + auto memoryManager = executionEnvironment->memoryManager.get(); + + auto device = static_cast(devices[0].get()); + + EXPECT_NE(nullptr, device->kernelEotWaAllocation); + + auto heapBase = memoryManager->getGfxPartition(device->getRootDeviceIndex())->getHeapBase(HeapIndex::HEAP_INTERNAL); + auto expectedGpuAddress = device->getGmmHelper()->canonize(heapBase + MemoryConstants::gigaByte * 4 - MemoryConstants::pageSize64k); + + EXPECT_EQ(device->kernelEotWaAllocation->getGpuAddress(), expectedGpuAddress); + EXPECT_EQ(device->kernelEotWaAllocation->getUnderlyingBufferSize(), MemoryConstants::pageSize64k); + + auto cpuPtr = device->kernelEotWaAllocation->getUnderlyingBuffer(); + uint8_t eotMemoryPattern[]{0x09, 0x0C, 0x80, 0x04, 0x00, 0x00, 0x00, 0x0C, 0x7F, 0x20, 0x30, 0x00, 0x00, 0x00, 0x00}; + + EXPECT_EQ(0, memcmp(ptrOffset(cpuPtr, MemoryConstants::pageSize64k - MemoryConstants::pageSize - sizeof(eotMemoryPattern)), eotMemoryPattern, sizeof(eotMemoryPattern))); +} + using DeviceTest = Test; TEST_F(DeviceTest, whenInitializeRayTracingIsCalledAndRtBackedBufferIsNullptrThenMemoryBackedBufferIsCreated) { diff --git a/shared/test/unit_test/memory_manager/gfx_partition_tests.cpp b/shared/test/unit_test/memory_manager/gfx_partition_tests.cpp index 8ccbf387d5..e39c4973c3 100644 --- a/shared/test/unit_test/memory_manager/gfx_partition_tests.cpp +++ b/shared/test/unit_test/memory_manager/gfx_partition_tests.cpp @@ -10,6 +10,7 @@ #include "shared/source/helpers/ptr_math.h" #include "shared/source/os_interface/os_memory.h" #include "shared/source/utilities/cpu_info.h" +#include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_gfx_partition.h" #include "gtest/gtest.h" @@ -165,6 +166,24 @@ TEST(GfxPartitionTest, GivenFullRange48BitSvmWhenTestingGfxPartitionThenAllExpec testGfxPartition(gfxPartition, gfxBase, gfxTop, gfxBase); } +TEST(GfxPartitionTest, GivenEnabledEotWaWhenInitializingHeapsThenInternalHeapsHave4GBMinusOnePageRange) { + DebugManagerStateRestore restorer; + DebugManager.flags.EnableEotWa.set(true); + for (auto &addressRange : {48, 57}) { + MockGfxPartition gfxPartition; + gfxPartition.init(maxNBitValue(addressRange), reservedCpuAddressRangeSize, 0, 1); + + auto expectedSize = 4 * MemoryConstants::gigaByte - MemoryConstants::pageSize64k; + EXPECT_EQ(gfxPartition.getHeapSize(HeapIndex::HEAP_INTERNAL), expectedSize); + EXPECT_EQ(gfxPartition.getHeapSize(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY), expectedSize); + + auto fullSize = 4 * MemoryConstants::gigaByte; + + EXPECT_EQ(gfxPartition.getHeapSize(HeapIndex::HEAP_EXTERNAL), fullSize); + EXPECT_EQ(gfxPartition.getHeapSize(HeapIndex::HEAP_EXTERNAL_DEVICE_MEMORY), fullSize); + } +} + TEST(GfxPartitionTest, GivenFullRange47BitSvmWhenTestingGfxPartitionThenAllExpectationsAreMet) { MockGfxPartition gfxPartition; gfxPartition.init(maxNBitValue(47), reservedCpuAddressRangeSize, 0, 1); diff --git a/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp b/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp index 8efedec66d..7030077a33 100644 --- a/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp @@ -1512,6 +1512,20 @@ TEST_F(DrmMemoryManagerWithLocalMemoryTest, givenDrmMemoryManagerWithLocalMemory memoryManager->freeGraphicsMemory(graphicsAllocation); } +TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, whenAllocatingKernelIsaWithSpecificGpuAddressThenThisAddressIsUsed) { + uint64_t expectedGpuAddress = 0xDEADBEEFu; + size_t size = 4096u; + AllocationProperties properties(rootDeviceIndex, true, size, AllocationType::KERNEL_ISA, false, device->getDeviceBitfield()); + properties.gpuAddress = expectedGpuAddress; + DebugManager.flags.EnableLocalMemory.set(true); + auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(properties); + ASSERT_NE(nullptr, graphicsAllocation); + + EXPECT_EQ(expectedGpuAddress, graphicsAllocation->getGpuAddress()); + EXPECT_EQ(MemoryPool::LocalMemory, graphicsAllocation->getMemoryPool()); + memoryManager->freeGraphicsMemory(graphicsAllocation); +} + TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerAndOsHandleWhenCreateIsCalledAndRootDeviceIndexIsSpecifiedThenGraphicsAllocationIsReturnedWithCorrectRootDeviceIndex) { mock->ioctl_expected.primeFdToHandle = 1; mock->ioctl_expected.gemWait = 1; diff --git a/shared/test/unit_test/os_interface/windows/CMakeLists.txt b/shared/test/unit_test/os_interface/windows/CMakeLists.txt index f05a0afad9..55e27ff92e 100644 --- a/shared/test/unit_test/os_interface/windows/CMakeLists.txt +++ b/shared/test/unit_test/os_interface/windows/CMakeLists.txt @@ -23,6 +23,7 @@ set(NEO_CORE_OS_INTERFACE_TESTS_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/wddm_address_space_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/wddm_command_stream_l0_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/wddm_mapper_tests.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/wddm_memory_manager_with_localmem_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/wddm_preemption_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/wddm_shared_allocations_test.cpp ${CMAKE_CURRENT_SOURCE_DIR}/wddm_special_heap_test.cpp diff --git a/shared/test/unit_test/os_interface/windows/wddm_memory_manager_with_localmem_tests.cpp b/shared/test/unit_test/os_interface/windows/wddm_memory_manager_with_localmem_tests.cpp new file mode 100644 index 0000000000..1a7b678050 --- /dev/null +++ b/shared/test/unit_test/os_interface/windows/wddm_memory_manager_with_localmem_tests.cpp @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2022 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/memory_manager/memory_manager.h" +#include "shared/source/os_interface/device_factory.h" +#include "shared/test/common/helpers/debug_manager_state_restore.h" +#include "shared/test/common/helpers/default_hw_info.h" +#include "shared/test/common/helpers/ult_hw_config.h" +#include "shared/test/common/helpers/variable_backup.h" +#include "shared/test/common/mocks/mock_execution_environment.h" +#include "shared/test/common/test_macros/test.h" + +using namespace NEO; + +TEST(WddmMemoryManagerWithLocalMemoryTest, whenAllocatingKernelIsaWithSpecificGpuAddressThenThisAddressIsUsed) { + if (is32bit) { + GTEST_SKIP(); + } + VariableBackup backup(&ultHwConfig); + ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false; + ultHwConfig.forceOsAgnosticMemoryManager = false; + + DebugManagerStateRestore restorer; + DebugManager.flags.EnableLocalMemory.set(true); + + auto executionEnvironment = new MockExecutionEnvironment(defaultHwInfo.get(), true, 1); + auto devices = DeviceFactory::createDevices(*executionEnvironment); + auto memoryManager = executionEnvironment->memoryManager.get(); + auto &device = devices.front(); + + uint64_t expectedGpuAddress = memoryManager->getInternalHeapBaseAddress(device->getRootDeviceIndex(), true) + MemoryConstants::gigaByte; + size_t size = 4096u; + + AllocationProperties properties(device->getRootDeviceIndex(), true, size, AllocationType::KERNEL_ISA, false, device->getDeviceBitfield()); + properties.gpuAddress = expectedGpuAddress; + + auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(properties); + ASSERT_NE(nullptr, graphicsAllocation); + + EXPECT_EQ(device->getGmmHelper()->canonize(expectedGpuAddress), graphicsAllocation->getGpuAddress()); + EXPECT_EQ(MemoryPool::LocalMemory, graphicsAllocation->getMemoryPool()); + memoryManager->freeGraphicsMemory(graphicsAllocation); +}