performance: Use lock pointer copy with sfence for dc flush mitigation

Resolves: NEO-12898

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk 2024-10-25 12:56:15 +00:00 committed by Compute-Runtime-Automation
parent 2d6fb1af4b
commit 8c3c703ec0
5 changed files with 27 additions and 21 deletions

View File

@ -24,6 +24,7 @@
#include "shared/source/memory_manager/memory_operations_handler.h" #include "shared/source/memory_manager/memory_operations_handler.h"
#include "shared/source/memory_manager/migration_sync_data.h" #include "shared/source/memory_manager/migration_sync_data.h"
#include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/os_interface.h"
#include "shared/source/utilities/cpuintrinsics.h"
#include "opencl/source/cl_device/cl_device.h" #include "opencl/source/cl_device/cl_device.h"
#include "opencl/source/command_queue/command_queue.h" #include "opencl/source/command_queue/command_queue.h"
@ -212,8 +213,7 @@ bool inline copyHostPointer(Buffer *buffer,
size <= Buffer::maxBufferSizeForCopyOnCpu && size <= Buffer::maxBufferSizeForCopyOnCpu &&
isCompressionEnabled == false && isCompressionEnabled == false &&
productHelper.getLocalMemoryAccessMode(hwInfo) != LocalMemoryAccessMode::cpuAccessDisallowed && productHelper.getLocalMemoryAccessMode(hwInfo) != LocalMemoryAccessMode::cpuAccessDisallowed &&
isLockable && isLockable;
!isGpuCopyRequiredForDcFlushMitigation;
if (debugManager.flags.CopyHostPtrOnCpu.get() != -1) { if (debugManager.flags.CopyHostPtrOnCpu.get() != -1) {
copyOnCpuAllowed = debugManager.flags.CopyHostPtrOnCpu.get() == 1; copyOnCpuAllowed = debugManager.flags.CopyHostPtrOnCpu.get() == 1;
@ -222,6 +222,11 @@ bool inline copyHostPointer(Buffer *buffer,
memory->setAubWritable(true, GraphicsAllocation::defaultBank); memory->setAubWritable(true, GraphicsAllocation::defaultBank);
memory->setTbxWritable(true, GraphicsAllocation::defaultBank); memory->setTbxWritable(true, GraphicsAllocation::defaultBank);
memcpy_s(ptrOffset(lockedPointer, buffer->getOffset()), size, hostPtr, size); memcpy_s(ptrOffset(lockedPointer, buffer->getOffset()), size, hostPtr, size);
if (isGpuCopyRequiredForDcFlushMitigation) {
CpuIntrinsics::sfence();
}
return true; return true;
} else { } else {
auto blitMemoryToAllocationResult = BlitOperationResult::unsupported; auto blitMemoryToAllocationResult = BlitOperationResult::unsupported;

View File

@ -20,7 +20,7 @@ bool CommandQueue::isTimestampWaitEnabled() {
} }
bool checkIsGpuCopyRequiredForDcFlushMitigation(AllocationType type) { bool checkIsGpuCopyRequiredForDcFlushMitigation(AllocationType type) {
return false; return ultHwConfig.useGpuCopyForDcFlushMitigation;
} }
} // namespace NEO } // namespace NEO

View File

@ -599,26 +599,22 @@ TEST(Buffer, givenClMemCopyHostPointerPassedToBufferCreateWhenAllocationIsNotInS
} }
} }
TEST(Buffer, givenDcFlushMitigationWhenCreateBufferCopyHostptrThenUseBlitterCopy) { namespace CpuIntrinsicsTests {
DebugManagerStateRestore restorer; extern std::atomic<uint32_t> sfenceCounter;
debugManager.flags.AllowDcFlush.set(0); } // namespace CpuIntrinsicsTests
ExecutionEnvironment *executionEnvironment = MockClDevice::prepareExecutionEnvironment(defaultHwInfo.get(), 0u);
executionEnvironment->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true;
TEST(Buffer, givenDcFlushMitigationWhenCreateBufferCopyHostptrThenUseLockPointerCopyWithSfence) {
ExecutionEnvironment *executionEnvironment = MockClDevice::prepareExecutionEnvironment(defaultHwInfo.get(), 0u);
auto productHelper = executionEnvironment->rootDeviceEnvironments[0]->productHelper.get(); auto productHelper = executionEnvironment->rootDeviceEnvironments[0]->productHelper.get();
if (!(productHelper->isBlitterFullySupported(*defaultHwInfo) && productHelper->isDcFlushMitigated())) { if (!productHelper->isDcFlushMitigated()) {
GTEST_SKIP(); GTEST_SKIP();
} }
auto blitterCalled = 0u; VariableBackup<UltHwConfig> backup(&ultHwConfig);
auto mockBlitMemoryToAllocation = [&](const NEO::Device &device, NEO::GraphicsAllocation *memory, size_t offset, const void *hostPtr, ultHwConfig.useGpuCopyForDcFlushMitigation = true;
Vec3<size_t> size) -> NEO::BlitOperationResult {
memcpy(memory->getUnderlyingBuffer(), hostPtr, size.x); DebugManagerStateRestore restorer;
blitterCalled++; debugManager.flags.AllowDcFlush.set(0);
return BlitOperationResult::success;
};
VariableBackup<NEO::BlitHelperFunctions::BlitMemoryToAllocationFunc> blitMemoryToAllocationFuncBackup(
&NEO::BlitHelperFunctions::blitMemoryToAllocation, mockBlitMemoryToAllocation);
auto *memoryManager = new MockMemoryManagerFailFirstAllocation(*executionEnvironment); auto *memoryManager = new MockMemoryManagerFailFirstAllocation(*executionEnvironment);
executionEnvironment->memoryManager.reset(memoryManager); executionEnvironment->memoryManager.reset(memoryManager);
@ -626,6 +622,7 @@ TEST(Buffer, givenDcFlushMitigationWhenCreateBufferCopyHostptrThenUseBlitterCopy
auto device = std::make_unique<MockClDevice>(MockDevice::create<MockDevice>(executionEnvironment, 0)); auto device = std::make_unique<MockClDevice>(MockDevice::create<MockDevice>(executionEnvironment, 0));
MockContext ctx(device.get()); MockContext ctx(device.get());
CpuIntrinsicsTests::sfenceCounter.store(0u);
cl_int retVal = 0; cl_int retVal = 0;
cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR; cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR;
@ -634,7 +631,8 @@ TEST(Buffer, givenDcFlushMitigationWhenCreateBufferCopyHostptrThenUseBlitterCopy
std::unique_ptr<Buffer> buffer(Buffer::create(&ctx, flags, sizeof(memory), memory, retVal)); std::unique_ptr<Buffer> buffer(Buffer::create(&ctx, flags, sizeof(memory), memory, retVal));
ASSERT_NE(nullptr, buffer.get()); ASSERT_NE(nullptr, buffer.get());
EXPECT_EQ(blitterCalled, 1u); EXPECT_EQ(1u, CpuIntrinsicsTests::sfenceCounter.load());
CpuIntrinsicsTests::sfenceCounter.store(0u);
} }
TEST(Buffer, givenPropertiesWithClDeviceHandleListKHRWhenCreateBufferThenCorrectBufferIsSet) { TEST(Buffer, givenPropertiesWithClDeviceHandleListKHRWhenCreateBufferThenCorrectBufferIsSet) {

View File

@ -50,7 +50,7 @@ void BaseUltConfigListener::OnTestEnd(const ::testing::TestInfo &) {
// Ensure that global state is restored // Ensure that global state is restored
UltHwConfig expectedState{}; UltHwConfig expectedState{};
static_assert(sizeof(UltHwConfig) == (16 * sizeof(bool) + sizeof(const char *)), ""); // Ensure that there is no internal padding static_assert(sizeof(UltHwConfig) == (17 * sizeof(bool) + sizeof(const char *)) + sizeof(UltHwConfig::padding), ""); // Ensure that there is no internal padding
EXPECT_EQ(0, memcmp(&expectedState, &ultHwConfig, sizeof(UltHwConfig))); EXPECT_EQ(0, memcmp(&expectedState, &ultHwConfig, sizeof(UltHwConfig)));
EXPECT_EQ(0, memcmp(&referencedHwInfo.platform, &defaultHwInfo->platform, sizeof(PLATFORM))); EXPECT_EQ(0, memcmp(&referencedHwInfo.platform, &defaultHwInfo->platform, sizeof(PLATFORM)));

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2020-2023 Intel Corporation * Copyright (C) 2020-2024 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@ -16,6 +16,7 @@ struct UltHwConfig {
bool useWaitForTimestamps = false; bool useWaitForTimestamps = false;
bool useBlitSplit = false; bool useBlitSplit = false;
bool useFirstSubmissionInitDevice = false; bool useFirstSubmissionInitDevice = false;
bool useGpuCopyForDcFlushMitigation = false;
bool csrFailInitDirectSubmission = false; bool csrFailInitDirectSubmission = false;
bool csrBaseCallDirectSubmissionAvailable = false; bool csrBaseCallDirectSubmissionAvailable = false;
@ -28,6 +29,8 @@ struct UltHwConfig {
bool csrCreatePreemptionReturnValue = true; bool csrCreatePreemptionReturnValue = true;
bool reserved = false; bool reserved = false;
char padding[7];
const char *aubTestName = nullptr; const char *aubTestName = nullptr;
}; };