mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 23:03:02 +08:00
Add debug flag for EOT WA
EOT WA requires allocating last 64KB of kernel heap and putting EOT signature at the last 16 bytes of kernel heap Related-To: NEO-7099 Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
9a667308b9
commit
cf3817e058
@@ -65,6 +65,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
||||
using BaseClass::wasSubmittedToSingleSubdevice;
|
||||
using BaseClass::CommandStreamReceiver::activePartitions;
|
||||
using BaseClass::CommandStreamReceiver::activePartitionsConfig;
|
||||
using BaseClass::CommandStreamReceiver::additionalAllocationsForResidency;
|
||||
using BaseClass::CommandStreamReceiver::baseWaitFunction;
|
||||
using BaseClass::CommandStreamReceiver::bindingTableBaseAddressRequired;
|
||||
using BaseClass::CommandStreamReceiver::canUse4GbHeaps;
|
||||
|
||||
@@ -388,11 +388,13 @@ NTSTATUS __stdcall D3DKMTQueryResourceInfoFromNtHandle(IN OUT D3DKMT_QUERYRESOUR
|
||||
return STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
uint8_t lockedData[0x20000]{};
|
||||
|
||||
NTSTATUS __stdcall D3DKMTLock2(IN OUT D3DKMT_LOCK2 *lock2) {
|
||||
if (lock2->hAllocation == 0 || lock2->hDevice == 0) {
|
||||
return STATUS_INVALID_PARAMETER;
|
||||
}
|
||||
lock2->pData = (void *)65536;
|
||||
lock2->pData = reinterpret_cast<void *>((reinterpret_cast<uintptr_t>(lockedData) + 0x10000) & (-0xFFFF));
|
||||
return STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
@@ -61,6 +61,7 @@ class MockDevice : public RootDevice {
|
||||
using Device::getGlobalMemorySize;
|
||||
using Device::initializeCaps;
|
||||
using Device::isDebuggerActive;
|
||||
using Device::kernelEotWaAllocation;
|
||||
using Device::regularEngineGroups;
|
||||
using Device::rootCsrCreated;
|
||||
using Device::rtMemoryBackedBuffer;
|
||||
|
||||
@@ -271,6 +271,7 @@ CFEMaximumNumberOfThreads = -1
|
||||
CFEOverDispatchControl = -1
|
||||
CFELargeGRFThreadAdjustDisable = -1
|
||||
SynchronizeWalkerInWparidMode = -1
|
||||
EnableEotWa = 0
|
||||
EnableWalkerPartition = -1
|
||||
OverrideNumComputeUnitsForScratch = -1
|
||||
ForceThreadGroupDispatchSize = -1
|
||||
|
||||
@@ -98,6 +98,18 @@ HWTEST_F(CommandStreamReceiverTest, WhenCreatingCsrThenTimestampTypeIs32b) {
|
||||
EXPECT_EQ(expectedOffset, tag->getGlobalStartOffset());
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverTest, whenAddingAdditionalAllocationForResidencyThenItIsRegisteredInCsr) {
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
||||
EXPECT_TRUE(csr.additionalAllocationsForResidency.empty());
|
||||
|
||||
MockGraphicsAllocation allocation{};
|
||||
csr.addAdditionalAllocationForResidency(&allocation);
|
||||
|
||||
EXPECT_EQ(1u, csr.additionalAllocationsForResidency.size());
|
||||
EXPECT_EQ(&allocation, csr.additionalAllocationsForResidency[0]);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverTest, WhenCreatingCsrThenFlagsAreSetCorrectly) {
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.initProgrammingFlags();
|
||||
@@ -2074,4 +2086,4 @@ HWTEST_F(CommandStreamReceiverTest, givenMultipleActivePartitionsWhenWaitLogIsEn
|
||||
<< " " << tagValue << std::endl;
|
||||
|
||||
EXPECT_STREQ(expectedOutput.str().c_str(), output.c_str());
|
||||
}
|
||||
}
|
||||
@@ -6,6 +6,7 @@
|
||||
*/
|
||||
|
||||
#include "shared/source/device/device.h"
|
||||
#include "shared/source/os_interface/device_factory.h"
|
||||
#include "shared/test/common/fixtures/device_fixture.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/helpers/default_hw_info.h"
|
||||
@@ -13,8 +14,10 @@
|
||||
#include "shared/test/common/mocks/mock_builtins.h"
|
||||
#include "shared/test/common/mocks/mock_compiler_interface.h"
|
||||
#include "shared/test/common/mocks/mock_compilers.h"
|
||||
#include "shared/test/common/mocks/mock_csr.h"
|
||||
#include "shared/test/common/mocks/mock_device.h"
|
||||
#include "shared/test/common/mocks/ult_device_factory.h"
|
||||
#include "shared/test/common/test_macros/hw_test.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
|
||||
using namespace NEO;
|
||||
@@ -46,6 +49,84 @@ TEST(Device, givenNoDebuggerWhenGettingDebuggerThenNullptrIsReturned) {
|
||||
EXPECT_EQ(nullptr, device->getSourceLevelDebugger());
|
||||
}
|
||||
|
||||
using DeviceKernelWaTest = ::testing::Test;
|
||||
|
||||
HWTEST_F(DeviceKernelWaTest, givenEnabledEotWaWhenCreatingDeviceThenKernelWaIsCreatedAndAddedToGpgpuCommandStreamReceiver) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.EnableEotWa.set(true);
|
||||
|
||||
{
|
||||
UltDeviceFactory factory{1, 0};
|
||||
|
||||
auto device = factory.rootDevices[0];
|
||||
|
||||
EXPECT_NE(nullptr, device->kernelEotWaAllocation);
|
||||
|
||||
for (auto &engine : device->allEngines) {
|
||||
auto csr = static_cast<UltCommandStreamReceiver<FamilyType> *>(engine.commandStreamReceiver);
|
||||
if (EngineHelpers::isBcs(engine.getEngineType())) {
|
||||
EXPECT_TRUE(csr->additionalAllocationsForResidency.empty());
|
||||
} else {
|
||||
EXPECT_EQ(1u, csr->additionalAllocationsForResidency.size());
|
||||
EXPECT_EQ(device->kernelEotWaAllocation, csr->additionalAllocationsForResidency[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
UltDeviceFactory factory{1, 2};
|
||||
|
||||
auto device = factory.rootDevices[0];
|
||||
|
||||
EXPECT_NE(nullptr, device->kernelEotWaAllocation);
|
||||
|
||||
for (auto &engine : device->allEngines) {
|
||||
auto csr = static_cast<UltCommandStreamReceiver<FamilyType> *>(engine.commandStreamReceiver);
|
||||
if (EngineHelpers::isBcs(engine.getEngineType())) {
|
||||
EXPECT_TRUE(csr->additionalAllocationsForResidency.empty());
|
||||
} else {
|
||||
EXPECT_EQ(1u, csr->additionalAllocationsForResidency.size());
|
||||
EXPECT_EQ(device->kernelEotWaAllocation, csr->additionalAllocationsForResidency[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, DeviceKernelWaTest, givenEnabledEotWaWhenCreatingDeviceThenKernelWaIsCreatedWithProperContentAndGpuAddress) {
|
||||
if (is32bit) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.EnableEotWa.set(true);
|
||||
DebugManager.flags.EnableLocalMemory.set(false);
|
||||
|
||||
VariableBackup<decltype(DeviceFactory::createRootDeviceFunc)> createRootDeviceFuncBackup{&DeviceFactory::createRootDeviceFunc};
|
||||
createRootDeviceFuncBackup = [](ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex) -> std::unique_ptr<Device> {
|
||||
return std::unique_ptr<Device>(MockDevice::create<MockDevice>(&executionEnvironment, rootDeviceIndex));
|
||||
};
|
||||
VariableBackup<UltHwConfig> backup(&ultHwConfig);
|
||||
ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false;
|
||||
ultHwConfig.forceOsAgnosticMemoryManager = false;
|
||||
|
||||
auto executionEnvironment = new MockExecutionEnvironment(defaultHwInfo.get(), true, 1);
|
||||
auto devices = DeviceFactory::createDevices(*executionEnvironment);
|
||||
auto memoryManager = executionEnvironment->memoryManager.get();
|
||||
|
||||
auto device = static_cast<MockDevice *>(devices[0].get());
|
||||
|
||||
EXPECT_NE(nullptr, device->kernelEotWaAllocation);
|
||||
|
||||
auto heapBase = memoryManager->getGfxPartition(device->getRootDeviceIndex())->getHeapBase(HeapIndex::HEAP_INTERNAL);
|
||||
auto expectedGpuAddress = device->getGmmHelper()->canonize(heapBase + MemoryConstants::gigaByte * 4 - MemoryConstants::pageSize64k);
|
||||
|
||||
EXPECT_EQ(device->kernelEotWaAllocation->getGpuAddress(), expectedGpuAddress);
|
||||
EXPECT_EQ(device->kernelEotWaAllocation->getUnderlyingBufferSize(), MemoryConstants::pageSize64k);
|
||||
|
||||
auto cpuPtr = device->kernelEotWaAllocation->getUnderlyingBuffer();
|
||||
uint8_t eotMemoryPattern[]{0x09, 0x0C, 0x80, 0x04, 0x00, 0x00, 0x00, 0x0C, 0x7F, 0x20, 0x30, 0x00, 0x00, 0x00, 0x00};
|
||||
|
||||
EXPECT_EQ(0, memcmp(ptrOffset(cpuPtr, MemoryConstants::pageSize64k - MemoryConstants::pageSize - sizeof(eotMemoryPattern)), eotMemoryPattern, sizeof(eotMemoryPattern)));
|
||||
}
|
||||
|
||||
using DeviceTest = Test<DeviceFixture>;
|
||||
|
||||
TEST_F(DeviceTest, whenInitializeRayTracingIsCalledAndRtBackedBufferIsNullptrThenMemoryBackedBufferIsCreated) {
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include "shared/source/helpers/ptr_math.h"
|
||||
#include "shared/source/os_interface/os_memory.h"
|
||||
#include "shared/source/utilities/cpu_info.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/mocks/mock_gfx_partition.h"
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
@@ -165,6 +166,24 @@ TEST(GfxPartitionTest, GivenFullRange48BitSvmWhenTestingGfxPartitionThenAllExpec
|
||||
testGfxPartition(gfxPartition, gfxBase, gfxTop, gfxBase);
|
||||
}
|
||||
|
||||
TEST(GfxPartitionTest, GivenEnabledEotWaWhenInitializingHeapsThenInternalHeapsHave4GBMinusOnePageRange) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.EnableEotWa.set(true);
|
||||
for (auto &addressRange : {48, 57}) {
|
||||
MockGfxPartition gfxPartition;
|
||||
gfxPartition.init(maxNBitValue(addressRange), reservedCpuAddressRangeSize, 0, 1);
|
||||
|
||||
auto expectedSize = 4 * MemoryConstants::gigaByte - MemoryConstants::pageSize64k;
|
||||
EXPECT_EQ(gfxPartition.getHeapSize(HeapIndex::HEAP_INTERNAL), expectedSize);
|
||||
EXPECT_EQ(gfxPartition.getHeapSize(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY), expectedSize);
|
||||
|
||||
auto fullSize = 4 * MemoryConstants::gigaByte;
|
||||
|
||||
EXPECT_EQ(gfxPartition.getHeapSize(HeapIndex::HEAP_EXTERNAL), fullSize);
|
||||
EXPECT_EQ(gfxPartition.getHeapSize(HeapIndex::HEAP_EXTERNAL_DEVICE_MEMORY), fullSize);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(GfxPartitionTest, GivenFullRange47BitSvmWhenTestingGfxPartitionThenAllExpectationsAreMet) {
|
||||
MockGfxPartition gfxPartition;
|
||||
gfxPartition.init(maxNBitValue(47), reservedCpuAddressRangeSize, 0, 1);
|
||||
|
||||
@@ -1512,6 +1512,20 @@ TEST_F(DrmMemoryManagerWithLocalMemoryTest, givenDrmMemoryManagerWithLocalMemory
|
||||
memoryManager->freeGraphicsMemory(graphicsAllocation);
|
||||
}
|
||||
|
||||
TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, whenAllocatingKernelIsaWithSpecificGpuAddressThenThisAddressIsUsed) {
|
||||
uint64_t expectedGpuAddress = 0xDEADBEEFu;
|
||||
size_t size = 4096u;
|
||||
AllocationProperties properties(rootDeviceIndex, true, size, AllocationType::KERNEL_ISA, false, device->getDeviceBitfield());
|
||||
properties.gpuAddress = expectedGpuAddress;
|
||||
DebugManager.flags.EnableLocalMemory.set(true);
|
||||
auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(properties);
|
||||
ASSERT_NE(nullptr, graphicsAllocation);
|
||||
|
||||
EXPECT_EQ(expectedGpuAddress, graphicsAllocation->getGpuAddress());
|
||||
EXPECT_EQ(MemoryPool::LocalMemory, graphicsAllocation->getMemoryPool());
|
||||
memoryManager->freeGraphicsMemory(graphicsAllocation);
|
||||
}
|
||||
|
||||
TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerAndOsHandleWhenCreateIsCalledAndRootDeviceIndexIsSpecifiedThenGraphicsAllocationIsReturnedWithCorrectRootDeviceIndex) {
|
||||
mock->ioctl_expected.primeFdToHandle = 1;
|
||||
mock->ioctl_expected.gemWait = 1;
|
||||
|
||||
@@ -23,6 +23,7 @@ set(NEO_CORE_OS_INTERFACE_TESTS_WINDOWS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/wddm_address_space_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/wddm_command_stream_l0_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/wddm_mapper_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/wddm_memory_manager_with_localmem_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/wddm_preemption_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/wddm_shared_allocations_test.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/wddm_special_heap_test.cpp
|
||||
|
||||
@@ -0,0 +1,47 @@
|
||||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/memory_manager/memory_manager.h"
|
||||
#include "shared/source/os_interface/device_factory.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/helpers/default_hw_info.h"
|
||||
#include "shared/test/common/helpers/ult_hw_config.h"
|
||||
#include "shared/test/common/helpers/variable_backup.h"
|
||||
#include "shared/test/common/mocks/mock_execution_environment.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
TEST(WddmMemoryManagerWithLocalMemoryTest, whenAllocatingKernelIsaWithSpecificGpuAddressThenThisAddressIsUsed) {
|
||||
if (is32bit) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
VariableBackup<UltHwConfig> backup(&ultHwConfig);
|
||||
ultHwConfig.useMockedPrepareDeviceEnvironmentsFunc = false;
|
||||
ultHwConfig.forceOsAgnosticMemoryManager = false;
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.EnableLocalMemory.set(true);
|
||||
|
||||
auto executionEnvironment = new MockExecutionEnvironment(defaultHwInfo.get(), true, 1);
|
||||
auto devices = DeviceFactory::createDevices(*executionEnvironment);
|
||||
auto memoryManager = executionEnvironment->memoryManager.get();
|
||||
auto &device = devices.front();
|
||||
|
||||
uint64_t expectedGpuAddress = memoryManager->getInternalHeapBaseAddress(device->getRootDeviceIndex(), true) + MemoryConstants::gigaByte;
|
||||
size_t size = 4096u;
|
||||
|
||||
AllocationProperties properties(device->getRootDeviceIndex(), true, size, AllocationType::KERNEL_ISA, false, device->getDeviceBitfield());
|
||||
properties.gpuAddress = expectedGpuAddress;
|
||||
|
||||
auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(properties);
|
||||
ASSERT_NE(nullptr, graphicsAllocation);
|
||||
|
||||
EXPECT_EQ(device->getGmmHelper()->canonize(expectedGpuAddress), graphicsAllocation->getGpuAddress());
|
||||
EXPECT_EQ(MemoryPool::LocalMemory, graphicsAllocation->getMemoryPool());
|
||||
memoryManager->freeGraphicsMemory(graphicsAllocation);
|
||||
}
|
||||
Reference in New Issue
Block a user