Add debug flag for EOT WA

EOT WA requires allocating last 64KB of kernel heap and putting EOT
signature at the last 16 bytes of kernel heap

Related-To: NEO-7099
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2022-06-15 01:12:33 +00:00
committed by Compute-Runtime-Automation
parent 9a667308b9
commit cf3817e058
21 changed files with 271 additions and 12 deletions

View File

@@ -65,6 +65,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::wasSubmittedToSingleSubdevice;
using BaseClass::CommandStreamReceiver::activePartitions;
using BaseClass::CommandStreamReceiver::activePartitionsConfig;
using BaseClass::CommandStreamReceiver::additionalAllocationsForResidency;
using BaseClass::CommandStreamReceiver::baseWaitFunction;
using BaseClass::CommandStreamReceiver::bindingTableBaseAddressRequired;
using BaseClass::CommandStreamReceiver::canUse4GbHeaps;

View File

@@ -388,11 +388,13 @@ NTSTATUS __stdcall D3DKMTQueryResourceInfoFromNtHandle(IN OUT D3DKMT_QUERYRESOUR
return STATUS_SUCCESS;
}
uint8_t lockedData[0x20000]{};
NTSTATUS __stdcall D3DKMTLock2(IN OUT D3DKMT_LOCK2 *lock2) {
if (lock2->hAllocation == 0 || lock2->hDevice == 0) {
return STATUS_INVALID_PARAMETER;
}
lock2->pData = (void *)65536;
lock2->pData = reinterpret_cast<void *>((reinterpret_cast<uintptr_t>(lockedData) + 0x10000) & (-0xFFFF));
return STATUS_SUCCESS;
}

View File

@@ -61,6 +61,7 @@ class MockDevice : public RootDevice {
using Device::getGlobalMemorySize;
using Device::initializeCaps;
using Device::isDebuggerActive;
using Device::kernelEotWaAllocation;
using Device::regularEngineGroups;
using Device::rootCsrCreated;
using Device::rtMemoryBackedBuffer;

View File

@@ -271,6 +271,7 @@ CFEMaximumNumberOfThreads = -1
CFEOverDispatchControl = -1
CFELargeGRFThreadAdjustDisable = -1
SynchronizeWalkerInWparidMode = -1
EnableEotWa = 0
EnableWalkerPartition = -1
OverrideNumComputeUnitsForScratch = -1
ForceThreadGroupDispatchSize = -1

View File

@@ -98,6 +98,18 @@ HWTEST_F(CommandStreamReceiverTest, WhenCreatingCsrThenTimestampTypeIs32b) {
EXPECT_EQ(expectedOffset, tag->getGlobalStartOffset());
}
HWTEST_F(CommandStreamReceiverTest, whenAddingAdditionalAllocationForResidencyThenItIsRegisteredInCsr) {
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
EXPECT_TRUE(csr.additionalAllocationsForResidency.empty());
MockGraphicsAllocation allocation{};
csr.addAdditionalAllocationForResidency(&allocation);
EXPECT_EQ(1u, csr.additionalAllocationsForResidency.size());
EXPECT_EQ(&allocation, csr.additionalAllocationsForResidency[0]);
}
HWTEST_F(CommandStreamReceiverTest, WhenCreatingCsrThenFlagsAreSetCorrectly) {
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.initProgrammingFlags();
@@ -2074,4 +2086,4 @@ HWTEST_F(CommandStreamReceiverTest, givenMultipleActivePartitionsWhenWaitLogIsEn
<< " " << tagValue << std::endl;
EXPECT_STREQ(expectedOutput.str().c_str(), output.c_str());
}
}

View File

@@ -6,6 +6,7 @@
*/
#include "shared/source/device/device.h"
#include "shared/source/os_interface/device_factory.h"
#include "shared/test/common/fixtures/device_fixture.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/default_hw_info.h"
@@ -13,8 +14,10 @@
#include "shared/test/common/mocks/mock_builtins.h"
#include "shared/test/common/mocks/mock_compiler_interface.h"
#include "shared/test/common/mocks/mock_compilers.h"
#include "shared/test/common/mocks/mock_csr.h"
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/mocks/ult_device_factory.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "shared/test/common/test_macros/test.h"
using namespace NEO;
@@ -46,6 +49,84 @@ TEST(Device, givenNoDebuggerWhenGettingDebuggerThenNullptrIsReturned) {
EXPECT_EQ(nullptr, device->getSourceLevelDebugger());
}
using DeviceKernelWaTest = ::testing::Test;
HWTEST_F(DeviceKernelWaTest, givenEnabledEotWaWhenCreatingDeviceThenKernelWaIsCreatedAndAddedToGpgpuCommandStreamReceiver) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableEotWa.set(true);
{
UltDeviceFactory factory{1, 0};
auto device = factory.rootDevices[0];
EXPECT_NE(nullptr, device->kernelEotWaAllocation);
for (auto &engine : device->allEngines) {
auto csr = static_cast<UltCommandStreamReceiver<FamilyType> *>(engine.commandStreamReceiver);
if (EngineHelpers::isBcs(engine.getEngineType())) {
EXPECT_TRUE(csr->additionalAllocationsForResidency.empty());
} else {
EXPECT_EQ(1u, csr->additionalAllocationsForResidency.size());
EXPECT_EQ(device->kernelEotWaAllocation, csr->additionalAllocationsForResidency[0]);
}
}
}
{
UltDeviceFactory factory{1, 2};
auto device = factory.rootDevices[0];
EXPECT_NE(nullptr, device->kernelEotWaAllocation);
for (auto &engine : device->allEngines) {
auto csr = static_cast<UltCommandStreamReceiver<FamilyType> *>(engine.commandStreamReceiver);
if (EngineHelpers::isBcs(engine.getEngineType())) {
EXPECT_TRUE(csr->additionalAllocationsForResidency.empty());
} else {
EXPECT_EQ(1u, csr->additionalAllocationsForResidency.size());
EXPECT_EQ(device->kernelEotWaAllocation, csr->additionalAllocationsForResidency[0]);
}
}
}
}
HWCMDTEST_F(IGFX_XE_HP_CORE, DeviceKernelWaTest, givenEnabledEotWaWhenCreatingDeviceThenKernelWaIsCreatedWithProperContentAndGpuAddress) {
if (is32bit) {
GTEST_SKIP();
}
DebugManagerStateRestore restorer;
DebugManager.flags.EnableEotWa.set(true);
DebugManager.flags.EnableLocalMemory.set(false);
VariableBackup<decltype(DeviceFactory::createRootDeviceFunc)> createRootDeviceFuncBackup{&DeviceFactory::createRootDeviceFunc};
createRootDeviceFuncBackup = [](ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) -> std::unique_ptr<Device> {
return std::unique_ptr<Device>(MockDevice::create<MockDevice>(&executionEnvironment, rootDeviceIndex));
};
VariableBackup<UltHwConfig> backup(&ultHwConfig);
ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false;
ultHwConfig.forceOsAgnosticMemoryManager = false;
auto executionEnvironment = new MockExecutionEnvironment(defaultHwInfo.get(), true, 1);
auto devices = DeviceFactory::createDevices(*executionEnvironment);
auto memoryManager = executionEnvironment->memoryManager.get();
auto device = static_cast<MockDevice *>(devices[0].get());
EXPECT_NE(nullptr, device->kernelEotWaAllocation);
auto heapBase = memoryManager->getGfxPartition(device->getRootDeviceIndex())->getHeapBase(HeapIndex::HEAP_INTERNAL);
auto expectedGpuAddress = device->getGmmHelper()->canonize(heapBase + MemoryConstants::gigaByte * 4 - MemoryConstants::pageSize64k);
EXPECT_EQ(device->kernelEotWaAllocation->getGpuAddress(), expectedGpuAddress);
EXPECT_EQ(device->kernelEotWaAllocation->getUnderlyingBufferSize(), MemoryConstants::pageSize64k);
auto cpuPtr = device->kernelEotWaAllocation->getUnderlyingBuffer();
uint8_t eotMemoryPattern[]{0x09, 0x0C, 0x80, 0x04, 0x00, 0x00, 0x00, 0x0C, 0x7F, 0x20, 0x30, 0x00, 0x00, 0x00, 0x00};
EXPECT_EQ(0, memcmp(ptrOffset(cpuPtr, MemoryConstants::pageSize64k - MemoryConstants::pageSize - sizeof(eotMemoryPattern)), eotMemoryPattern, sizeof(eotMemoryPattern)));
}
using DeviceTest = Test<DeviceFixture>;
TEST_F(DeviceTest, whenInitializeRayTracingIsCalledAndRtBackedBufferIsNullptrThenMemoryBackedBufferIsCreated) {

View File

@@ -10,6 +10,7 @@
#include "shared/source/helpers/ptr_math.h"
#include "shared/source/os_interface/os_memory.h"
#include "shared/source/utilities/cpu_info.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/mocks/mock_gfx_partition.h"
#include "gtest/gtest.h"
@@ -165,6 +166,24 @@ TEST(GfxPartitionTest, GivenFullRange48BitSvmWhenTestingGfxPartitionThenAllExpec
testGfxPartition(gfxPartition, gfxBase, gfxTop, gfxBase);
}
TEST(GfxPartitionTest, GivenEnabledEotWaWhenInitializingHeapsThenInternalHeapsHave4GBMinusOnePageRange) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableEotWa.set(true);
for (auto &addressRange : {48, 57}) {
MockGfxPartition gfxPartition;
gfxPartition.init(maxNBitValue(addressRange), reservedCpuAddressRangeSize, 0, 1);
auto expectedSize = 4 * MemoryConstants::gigaByte - MemoryConstants::pageSize64k;
EXPECT_EQ(gfxPartition.getHeapSize(HeapIndex::HEAP_INTERNAL), expectedSize);
EXPECT_EQ(gfxPartition.getHeapSize(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY), expectedSize);
auto fullSize = 4 * MemoryConstants::gigaByte;
EXPECT_EQ(gfxPartition.getHeapSize(HeapIndex::HEAP_EXTERNAL), fullSize);
EXPECT_EQ(gfxPartition.getHeapSize(HeapIndex::HEAP_EXTERNAL_DEVICE_MEMORY), fullSize);
}
}
TEST(GfxPartitionTest, GivenFullRange47BitSvmWhenTestingGfxPartitionThenAllExpectationsAreMet) {
MockGfxPartition gfxPartition;
gfxPartition.init(maxNBitValue(47), reservedCpuAddressRangeSize, 0, 1);

View File

@@ -1512,6 +1512,20 @@ TEST_F(DrmMemoryManagerWithLocalMemoryTest, givenDrmMemoryManagerWithLocalMemory
memoryManager->freeGraphicsMemory(graphicsAllocation);
}
TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, whenAllocatingKernelIsaWithSpecificGpuAddressThenThisAddressIsUsed) {
uint64_t expectedGpuAddress = 0xDEADBEEFu;
size_t size = 4096u;
AllocationProperties properties(rootDeviceIndex, true, size, AllocationType::KERNEL_ISA, false, device->getDeviceBitfield());
properties.gpuAddress = expectedGpuAddress;
DebugManager.flags.EnableLocalMemory.set(true);
auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(properties);
ASSERT_NE(nullptr, graphicsAllocation);
EXPECT_EQ(expectedGpuAddress, graphicsAllocation->getGpuAddress());
EXPECT_EQ(MemoryPool::LocalMemory, graphicsAllocation->getMemoryPool());
memoryManager->freeGraphicsMemory(graphicsAllocation);
}
TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerAndOsHandleWhenCreateIsCalledAndRootDeviceIndexIsSpecifiedThenGraphicsAllocationIsReturnedWithCorrectRootDeviceIndex) {
mock->ioctl_expected.primeFdToHandle = 1;
mock->ioctl_expected.gemWait = 1;

View File

@@ -23,6 +23,7 @@ set(NEO_CORE_OS_INTERFACE_TESTS_WINDOWS
${CMAKE_CURRENT_SOURCE_DIR}/wddm_address_space_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/wddm_command_stream_l0_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/wddm_mapper_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/wddm_memory_manager_with_localmem_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/wddm_preemption_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/wddm_shared_allocations_test.cpp
${CMAKE_CURRENT_SOURCE_DIR}/wddm_special_heap_test.cpp

View File

@@ -0,0 +1,47 @@
/*
* Copyright (C) 2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/os_interface/device_factory.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/default_hw_info.h"
#include "shared/test/common/helpers/ult_hw_config.h"
#include "shared/test/common/helpers/variable_backup.h"
#include "shared/test/common/mocks/mock_execution_environment.h"
#include "shared/test/common/test_macros/test.h"
using namespace NEO;
TEST(WddmMemoryManagerWithLocalMemoryTest, whenAllocatingKernelIsaWithSpecificGpuAddressThenThisAddressIsUsed) {
if (is32bit) {
GTEST_SKIP();
}
VariableBackup<UltHwConfig> backup(&ultHwConfig);
ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false;
ultHwConfig.forceOsAgnosticMemoryManager = false;
DebugManagerStateRestore restorer;
DebugManager.flags.EnableLocalMemory.set(true);
auto executionEnvironment = new MockExecutionEnvironment(defaultHwInfo.get(), true, 1);
auto devices = DeviceFactory::createDevices(*executionEnvironment);
auto memoryManager = executionEnvironment->memoryManager.get();
auto &device = devices.front();
uint64_t expectedGpuAddress = memoryManager->getInternalHeapBaseAddress(device->getRootDeviceIndex(), true) + MemoryConstants::gigaByte;
size_t size = 4096u;
AllocationProperties properties(device->getRootDeviceIndex(), true, size, AllocationType::KERNEL_ISA, false, device->getDeviceBitfield());
properties.gpuAddress = expectedGpuAddress;
auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(properties);
ASSERT_NE(nullptr, graphicsAllocation);
EXPECT_EQ(device->getGmmHelper()->canonize(expectedGpuAddress), graphicsAllocation->getGpuAddress());
EXPECT_EQ(MemoryPool::LocalMemory, graphicsAllocation->getMemoryPool());
memoryManager->freeGraphicsMemory(graphicsAllocation);
}