Add tweaks and control flags to linux completion fence

Related-To: NEO-6575

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2022-01-20 18:13:07 +00:00
committed by Compute-Runtime-Automation
parent fdef257b01
commit a7455b5767
26 changed files with 407 additions and 34 deletions

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2018-2021 Intel Corporation
# Copyright (C) 2018-2022 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@@ -65,6 +65,12 @@ if("${BRANCH_TYPE}" STREQUAL "")
)
endif()
if(TESTS_XEHP_AND_LATER)
list(APPEND IGDRCL_SRCS_tests_os_interface_linux
${CMAKE_CURRENT_SOURCE_DIR}/drm_command_stream_xehp_and_later_tests.cpp
)
endif()
if(UNIX)
target_sources(igdrcl_tests PRIVATE ${IGDRCL_SRCS_tests_os_interface_linux})
endif()

View File

@@ -95,8 +95,10 @@ HWTEST_F(DrmCommandStreamMMTest, givenExecutionEnvironmentWithMoreThanOneRootDev
}
}
HWTEST_TEMPLATED_F(DrmCommandStreamMemExecTest, GivenDrmSupportsCompletionFenceWhenCallingCsrExecThenTagAllocationIsPassed) {
HWTEST_TEMPLATED_F(DrmCommandStreamMemExecTest, GivenDrmSupportsVmBindAndCompletionFenceWhenCallingCsrExecThenTagAllocationIsPassed) {
mock->completionFenceSupported = true;
mock->isVmBindAvailableCall.callParent = false;
mock->isVmBindAvailableCall.returnValue = true;
TestedBufferObject bo(mock, 128);
MockDrmAllocation cmdBuffer(GraphicsAllocation::AllocationType::COMMAND_BUFFER, MemoryPool::System4KBPages);
@@ -118,7 +120,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamMemExecTest, GivenDrmSupportsCompletionFenceW
auto *testCsr = static_cast<TestedDrmCommandStreamReceiver<FamilyType> *>(csr);
testCsr->latestSentTaskCount = 2;
int ret = testCsr->exec(batchBuffer, 1, 2);
int ret = testCsr->exec(batchBuffer, 1, 2, 0);
EXPECT_EQ(0, ret);
EXPECT_EQ(expectedCompletionGpuAddress, bo.receivedCompletionGpuAddress);
@@ -126,3 +128,73 @@ HWTEST_TEMPLATED_F(DrmCommandStreamMemExecTest, GivenDrmSupportsCompletionFenceW
mm->freeGraphicsMemory(allocation);
}
HWTEST_TEMPLATED_F(DrmCommandStreamMemExecTest, GivenDrmSupportsVmBindAndNotCompletionFenceWhenCallingCsrExecThenTagAllocationIsNotPassed) {
mock->completionFenceSupported = false;
mock->isVmBindAvailableCall.callParent = false;
mock->isVmBindAvailableCall.returnValue = true;
TestedBufferObject bo(mock, 128);
MockDrmAllocation cmdBuffer(GraphicsAllocation::AllocationType::COMMAND_BUFFER, MemoryPool::System4KBPages);
cmdBuffer.bufferObjects[0] = &bo;
uint8_t buff[128];
LinearStream cs(&cmdBuffer, buff, 128);
CommandStreamReceiverHw<FamilyType>::addBatchBufferEnd(cs, nullptr);
EncodeNoop<FamilyType>::alignToCacheLine(cs);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false};
auto allocation = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize});
csr->makeResident(cmdBuffer);
csr->makeResident(*allocation);
csr->makeResident(*csr->getTagAllocation());
constexpr uint64_t expectedCompletionGpuAddress = 0;
constexpr uint32_t expectedCompletionValue = 0;
auto *testCsr = static_cast<TestedDrmCommandStreamReceiver<FamilyType> *>(csr);
testCsr->latestSentTaskCount = 2;
int ret = testCsr->exec(batchBuffer, 1, 2, 0);
EXPECT_EQ(0, ret);
EXPECT_EQ(expectedCompletionGpuAddress, bo.receivedCompletionGpuAddress);
EXPECT_EQ(expectedCompletionValue, bo.receivedCompletionValue);
mm->freeGraphicsMemory(allocation);
}
HWTEST_TEMPLATED_F(DrmCommandStreamMemExecTest, GivenDrmSupportsCompletionFenceAndNotVmBindWhenCallingCsrExecThenTagAllocationIsNotPassed) {
mock->completionFenceSupported = true;
mock->isVmBindAvailableCall.callParent = false;
mock->isVmBindAvailableCall.returnValue = false;
TestedBufferObject bo(mock, 128);
MockDrmAllocation cmdBuffer(GraphicsAllocation::AllocationType::COMMAND_BUFFER, MemoryPool::System4KBPages);
cmdBuffer.bufferObjects[0] = &bo;
uint8_t buff[128];
LinearStream cs(&cmdBuffer, buff, 128);
CommandStreamReceiverHw<FamilyType>::addBatchBufferEnd(cs, nullptr);
EncodeNoop<FamilyType>::alignToCacheLine(cs);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false};
auto allocation = mm->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize});
csr->makeResident(cmdBuffer);
csr->makeResident(*allocation);
csr->makeResident(*csr->getTagAllocation());
constexpr uint64_t expectedCompletionGpuAddress = 0;
constexpr uint32_t expectedCompletionValue = 0;
auto *testCsr = static_cast<TestedDrmCommandStreamReceiver<FamilyType> *>(csr);
testCsr->latestSentTaskCount = 2;
int ret = testCsr->exec(batchBuffer, 1, 2, 0);
EXPECT_EQ(0, ret);
EXPECT_EQ(expectedCompletionGpuAddress, bo.receivedCompletionGpuAddress);
EXPECT_EQ(expectedCompletionValue, bo.receivedCompletionValue);
mm->freeGraphicsMemory(allocation);
}

View File

@@ -0,0 +1,182 @@
/*
* Copyright (C) 2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/os_interface/linux/drm_command_stream.h"
#include "shared/source/os_interface/linux/drm_memory_manager.h"
#include "shared/source/os_interface/linux/drm_memory_operations_handler.h"
#include "shared/source/os_interface/linux/os_context_linux.h"
#include "shared/source/os_interface/os_interface.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/engine_descriptor_helper.h"
#include "shared/test/common/helpers/variable_backup.h"
#include "shared/test/common/libult/linux/drm_mock.h"
#include "shared/test/common/mocks/linux/mock_drm_allocation.h"
#include "shared/test/common/mocks/linux/mock_drm_memory_manager.h"
#include "shared/test/common/mocks/mock_allocation_properties.h"
#include "shared/test/common/mocks/mock_execution_environment.h"
#include "shared/test/common/os_interface/linux/device_command_stream_fixture.h"
#include "shared/test/common/os_interface/linux/drm_buffer_object_fixture.h"
#include "shared/test/common/test_macros/test.h"
#include "opencl/test/unit_test/os_interface/linux/drm_command_stream_fixture.h"
using namespace NEO;
struct DrmCommandStreamMultiTileMemExecFixture {
void SetUp() {
DebugManager.flags.CreateMultipleSubDevices.set(2u);
DebugManager.flags.EnableImplicitScaling.set(1);
DebugManager.flags.EnableForcePin.set(false);
osLocalMemoryBackup = std::make_unique<VariableBackup<bool>>(&OSInterface::osEnableLocalMemory, true);
executionEnvironment = new MockExecutionEnvironment();
executionEnvironment->incRefInternal();
executionEnvironment->initGmm();
mock = new DrmMockCustom(*executionEnvironment->rootDeviceEnvironments[0]);
executionEnvironment->rootDeviceEnvironments[0]->osInterface = std::make_unique<OSInterface>();
executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr<DriverModel>(mock));
executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface = DrmMemoryOperationsHandler::create(*mock, 0);
memoryManager = new DrmMemoryManager(gemCloseWorkerMode::gemCloseWorkerInactive,
DebugManager.flags.EnableForcePin.get(),
true,
*executionEnvironment);
executionEnvironment->memoryManager.reset(memoryManager);
executionEnvironment->prepareRootDeviceEnvironments(1u);
executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(NEO::defaultHwInfo.get());
executionEnvironment->initializeMemoryManager();
device.reset(MockDevice::create<MockDevice>(executionEnvironment, 0));
osContext = std::make_unique<OsContextLinux>(*mock, 0u, EngineDescriptorHelper::getDefaultDescriptor(device->getDeviceBitfield()));
osContext->ensureContextInitialized();
}
void TearDown() {
executionEnvironment->decRefInternal();
}
DebugManagerStateRestore dbgRestore;
std::unique_ptr<VariableBackup<bool>> osLocalMemoryBackup;
std::unique_ptr<MockDevice> device;
std::unique_ptr<OsContext> osContext;
MockExecutionEnvironment *executionEnvironment = nullptr;
DrmMockCustom *mock = nullptr;
DrmMemoryManager *memoryManager = nullptr;
};
using DrmCommandStreamMultiTileMemExecTest = Test<DrmCommandStreamMultiTileMemExecFixture>;
HWCMDTEST_F(IGFX_XE_HP_CORE, DrmCommandStreamMultiTileMemExecTest, GivenDrmSupportsCompletionFenceAndVmBindWhenCallingCsrExecThenMultipleTagAllocationIsPassed) {
auto *testCsr = new TestedDrmCommandStreamReceiver<FamilyType>(*executionEnvironment, 0, device->getDeviceBitfield());
device->resetCommandStreamReceiver(testCsr);
EXPECT_EQ(2u, testCsr->activePartitions);
testCsr->setupContext(*osContext.get());
mock->completionFenceSupported = true;
mock->isVmBindAvailableCall.callParent = false;
mock->isVmBindAvailableCall.returnValue = true;
TestedBufferObject bo(mock, 128);
MockDrmAllocation cmdBuffer(GraphicsAllocation::AllocationType::COMMAND_BUFFER, MemoryPool::System4KBPages);
cmdBuffer.bufferObjects[0] = &bo;
uint8_t buff[128];
LinearStream cs(&cmdBuffer, buff, 128);
CommandStreamReceiverHw<FamilyType>::addBatchBufferEnd(cs, nullptr);
EncodeNoop<FamilyType>::alignToCacheLine(cs);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false};
auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{testCsr->getRootDeviceIndex(), MemoryConstants::pageSize});
testCsr->makeResident(cmdBuffer);
testCsr->makeResident(*allocation);
testCsr->makeResident(*testCsr->getTagAllocation());
testCsr->latestSentTaskCount = 2;
testCsr->postSyncWriteOffset = 16;
uint64_t expectedCompletionGpuAddress = testCsr->getTagAllocation()->getGpuAddress() + Drm::completionFenceOffset + testCsr->postSyncWriteOffset;
int ret = testCsr->flushInternal(batchBuffer, testCsr->getResidencyAllocations());
EXPECT_EQ(0, ret);
EXPECT_EQ(expectedCompletionGpuAddress, bo.receivedCompletionGpuAddress);
EXPECT_EQ(testCsr->latestSentTaskCount, bo.receivedCompletionValue);
EXPECT_EQ(2u, bo.execCalled);
memoryManager->freeGraphicsMemory(allocation);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, DrmCommandStreamMultiTileMemExecTest, GivenDrmSupportsCompletionFenceAndVmBindWhenHandlingCompletionThenExpectMultipleWaitCalls) {
EngineControl &defaultEngine = device->getDefaultEngine();
EXPECT_EQ(2u, defaultEngine.commandStreamReceiver->getActivePartitions());
uint32_t postSyncOffset = defaultEngine.commandStreamReceiver->getPostSyncWriteOffset();
EXPECT_NE(0u, postSyncOffset);
mock->completionFenceSupported = true;
mock->isVmBindAvailableCall.callParent = false;
mock->isVmBindAvailableCall.returnValue = true;
auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{0, 1024, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
allocation->updateTaskCount(2, defaultEngine.osContext->getContextId());
volatile uint32_t *completionAddress = defaultEngine.commandStreamReceiver->getTagAddress();
completionAddress += (Drm::completionFenceOffset / sizeof(uint32_t));
*completionAddress = 1;
completionAddress += (postSyncOffset / sizeof(uint32_t));
*completionAddress = 1;
memoryManager->handleFenceCompletion(allocation);
uint64_t expectedAddress = castToUint64(const_cast<uint32_t *>(defaultEngine.commandStreamReceiver->getTagAddress())) +
Drm::completionFenceOffset +
postSyncOffset;
constexpr uint64_t expectedValue = 2;
EXPECT_EQ(2u, mock->waitUserFenceCall.called);
EXPECT_EQ(expectedAddress, mock->waitUserFenceCall.address);
EXPECT_EQ(expectedValue, mock->waitUserFenceCall.value);
memoryManager->freeGraphicsMemory(allocation);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, DrmCommandStreamMultiTileMemExecTest, GivenDrmSupportsCompletionFenceAndVmBindWhenHandlingCompletionAndOneContextIsReadyThenExpectOneWaitCall) {
EngineControl &defaultEngine = device->getDefaultEngine();
EXPECT_EQ(2u, defaultEngine.commandStreamReceiver->getActivePartitions());
uint32_t postSyncOffset = defaultEngine.commandStreamReceiver->getPostSyncWriteOffset();
EXPECT_NE(0u, postSyncOffset);
mock->completionFenceSupported = true;
mock->isVmBindAvailableCall.callParent = false;
mock->isVmBindAvailableCall.returnValue = true;
auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{0, 1024, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
allocation->updateTaskCount(2, defaultEngine.osContext->getContextId());
volatile uint32_t *completionAddress = defaultEngine.commandStreamReceiver->getTagAddress();
completionAddress += (Drm::completionFenceOffset / sizeof(uint32_t));
*completionAddress = 2; //1st context is ready
completionAddress += (postSyncOffset / sizeof(uint32_t));
*completionAddress = 1;
memoryManager->handleFenceCompletion(allocation);
uint64_t expectedAddress = castToUint64(const_cast<uint32_t *>(defaultEngine.commandStreamReceiver->getTagAddress())) +
Drm::completionFenceOffset +
postSyncOffset;
constexpr uint64_t expectedValue = 2;
EXPECT_EQ(1u, mock->waitUserFenceCall.called);
EXPECT_EQ(expectedAddress, mock->waitUserFenceCall.address);
EXPECT_EQ(expectedValue, mock->waitUserFenceCall.value);
memoryManager->freeGraphicsMemory(allocation);
}

View File

@@ -5809,14 +5809,29 @@ TEST_F(DrmMemoryManagerTest, GivenEligbleAllocationTypeWhenCheckingAllocationEli
}
TEST_F(DrmMemoryManagerTest, GivenNotEligbleAllocationTypeWhenCheckingAllocationEligbleForCompletionFenceThenReturnFalse) {
GraphicsAllocation::AllocationType validAllocations[] = {
GraphicsAllocation::AllocationType invalidAllocations[] = {
GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY,
GraphicsAllocation::AllocationType::CONSTANT_SURFACE,
GraphicsAllocation::AllocationType::FILL_PATTERN,
GraphicsAllocation::AllocationType::GLOBAL_SURFACE};
for (size_t i = 0; i < 4; i++) {
EXPECT_FALSE(memoryManager->allocationTypeForCompletionFence(validAllocations[i]));
EXPECT_FALSE(memoryManager->allocationTypeForCompletionFence(invalidAllocations[i]));
}
}
TEST_F(DrmMemoryManagerTest, GivenNotEligbleAllocationTypeAndDebugFlagOverridingWhenCheckingAllocationEligbleForCompletionFenceThenReturnTrue) {
DebugManagerStateRestore dbgState;
DebugManager.flags.UseDrmCompletionFenceForAllAllocations.set(1);
GraphicsAllocation::AllocationType invalidAllocations[] = {
GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY,
GraphicsAllocation::AllocationType::CONSTANT_SURFACE,
GraphicsAllocation::AllocationType::FILL_PATTERN,
GraphicsAllocation::AllocationType::GLOBAL_SURFACE};
for (size_t i = 0; i < 4; i++) {
EXPECT_TRUE(memoryManager->allocationTypeForCompletionFence(invalidAllocations[i]));
}
}
@@ -5877,4 +5892,27 @@ TEST_F(DrmMemoryManagerTest, givenCompletionFenceEnabledWhenHandlingCompletionOf
memoryManager->freeGraphicsMemory(allocation);
}
HWTEST_F(DrmMemoryManagerTest, givenCompletionFenceEnabledWhenHandlingCompletionAndTagAddressIsNullThenDoNotCallWaitUserFence) {
mock->ioctl_expected.total = -1;
VariableBackup<bool> backupFenceSupported{&mock->completionFenceSupported, true};
VariableBackup<bool> backupVmBindCallParent{&mock->isVmBindAvailableCall.callParent, false};
VariableBackup<bool> backupVmBindReturnValue{&mock->isVmBindAvailableCall.returnValue, true};
auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, 1024, GraphicsAllocation::AllocationType::COMMAND_BUFFER});
auto engine = memoryManager->getRegisteredEngines()[0];
allocation->updateTaskCount(2, engine.osContext->getContextId());
auto testCsr = static_cast<TestedDrmCommandStreamReceiver<FamilyType> *>(engine.commandStreamReceiver);
auto backupTagAddress = testCsr->tagAddress;
testCsr->tagAddress = nullptr;
memoryManager->handleFenceCompletion(allocation);
EXPECT_EQ(0u, mock->waitUserFenceCall.called);
testCsr->tagAddress = backupTagAddress;
memoryManager->freeGraphicsMemory(allocation);
}
} // namespace NEO

View File

@@ -944,14 +944,23 @@ TEST(DrmTest, GivenCompletionFenceDebugFlagWhenCreatingDrmObjectThenExpectCorrec
auto executionEnvironment = std::make_unique<ExecutionEnvironment>();
executionEnvironment->prepareRootDeviceEnvironments(1);
HardwareInfo *hwInfo = defaultHwInfo.get();
executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(hwInfo);
auto &hwHelper = HwHelper::get(hwInfo->platform.eRenderCoreFamily);
DrmMock drmDefault{*executionEnvironment->rootDeviceEnvironments[0]};
EXPECT_FALSE(drmDefault.completionFenceSupported);
if (hwHelper.isLinuxCompletionFenceSupported()) {
EXPECT_TRUE(drmDefault.completionFenceSupport());
} else {
EXPECT_FALSE(drmDefault.completionFenceSupport());
}
DebugManager.flags.EnableDrmCompletionFence.set(1);
DrmMock drmEnabled{*executionEnvironment->rootDeviceEnvironments[0]};
EXPECT_TRUE(drmEnabled.completionFenceSupported);
EXPECT_TRUE(drmEnabled.completionFenceSupport());
DebugManager.flags.EnableDrmCompletionFence.set(0);
DrmMock drmDisabled{*executionEnvironment->rootDeviceEnvironments[0]};
EXPECT_FALSE(drmDisabled.completionFenceSupported);
EXPECT_FALSE(drmDisabled.completionFenceSupport());
}

View File

@@ -365,6 +365,7 @@ UpdateCrossThreadDataSize = 0
ForceBcsEngineIndex = -1
ResolveDependenciesViaPipeControls = -1
EnableDrmCompletionFence = -1
UseDrmCompletionFenceForAllAllocations = -1
ExperimentalEnableSourceLevelDebugger = 0
Force2dImageAsArray = -1
ForceExtendedBufferSize = -1