performance: Use lock pointer copy with sfence for dc flush mitigation
Resolves: NEO-12898 Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
parent
2d6fb1af4b
commit
8c3c703ec0
|
@ -24,6 +24,7 @@
|
||||||
#include "shared/source/memory_manager/memory_operations_handler.h"
|
#include "shared/source/memory_manager/memory_operations_handler.h"
|
||||||
#include "shared/source/memory_manager/migration_sync_data.h"
|
#include "shared/source/memory_manager/migration_sync_data.h"
|
||||||
#include "shared/source/os_interface/os_interface.h"
|
#include "shared/source/os_interface/os_interface.h"
|
||||||
|
#include "shared/source/utilities/cpuintrinsics.h"
|
||||||
|
|
||||||
#include "opencl/source/cl_device/cl_device.h"
|
#include "opencl/source/cl_device/cl_device.h"
|
||||||
#include "opencl/source/command_queue/command_queue.h"
|
#include "opencl/source/command_queue/command_queue.h"
|
||||||
|
@ -212,8 +213,7 @@ bool inline copyHostPointer(Buffer *buffer,
|
||||||
size <= Buffer::maxBufferSizeForCopyOnCpu &&
|
size <= Buffer::maxBufferSizeForCopyOnCpu &&
|
||||||
isCompressionEnabled == false &&
|
isCompressionEnabled == false &&
|
||||||
productHelper.getLocalMemoryAccessMode(hwInfo) != LocalMemoryAccessMode::cpuAccessDisallowed &&
|
productHelper.getLocalMemoryAccessMode(hwInfo) != LocalMemoryAccessMode::cpuAccessDisallowed &&
|
||||||
isLockable &&
|
isLockable;
|
||||||
!isGpuCopyRequiredForDcFlushMitigation;
|
|
||||||
|
|
||||||
if (debugManager.flags.CopyHostPtrOnCpu.get() != -1) {
|
if (debugManager.flags.CopyHostPtrOnCpu.get() != -1) {
|
||||||
copyOnCpuAllowed = debugManager.flags.CopyHostPtrOnCpu.get() == 1;
|
copyOnCpuAllowed = debugManager.flags.CopyHostPtrOnCpu.get() == 1;
|
||||||
|
@ -222,6 +222,11 @@ bool inline copyHostPointer(Buffer *buffer,
|
||||||
memory->setAubWritable(true, GraphicsAllocation::defaultBank);
|
memory->setAubWritable(true, GraphicsAllocation::defaultBank);
|
||||||
memory->setTbxWritable(true, GraphicsAllocation::defaultBank);
|
memory->setTbxWritable(true, GraphicsAllocation::defaultBank);
|
||||||
memcpy_s(ptrOffset(lockedPointer, buffer->getOffset()), size, hostPtr, size);
|
memcpy_s(ptrOffset(lockedPointer, buffer->getOffset()), size, hostPtr, size);
|
||||||
|
|
||||||
|
if (isGpuCopyRequiredForDcFlushMitigation) {
|
||||||
|
CpuIntrinsics::sfence();
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
auto blitMemoryToAllocationResult = BlitOperationResult::unsupported;
|
auto blitMemoryToAllocationResult = BlitOperationResult::unsupported;
|
||||||
|
|
|
@ -20,7 +20,7 @@ bool CommandQueue::isTimestampWaitEnabled() {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool checkIsGpuCopyRequiredForDcFlushMitigation(AllocationType type) {
|
bool checkIsGpuCopyRequiredForDcFlushMitigation(AllocationType type) {
|
||||||
return false;
|
return ultHwConfig.useGpuCopyForDcFlushMitigation;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace NEO
|
} // namespace NEO
|
|
@ -599,26 +599,22 @@ TEST(Buffer, givenClMemCopyHostPointerPassedToBufferCreateWhenAllocationIsNotInS
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(Buffer, givenDcFlushMitigationWhenCreateBufferCopyHostptrThenUseBlitterCopy) {
|
namespace CpuIntrinsicsTests {
|
||||||
DebugManagerStateRestore restorer;
|
extern std::atomic<uint32_t> sfenceCounter;
|
||||||
debugManager.flags.AllowDcFlush.set(0);
|
} // namespace CpuIntrinsicsTests
|
||||||
ExecutionEnvironment *executionEnvironment = MockClDevice::prepareExecutionEnvironment(defaultHwInfo.get(), 0u);
|
|
||||||
executionEnvironment->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true;
|
|
||||||
|
|
||||||
|
TEST(Buffer, givenDcFlushMitigationWhenCreateBufferCopyHostptrThenUseLockPointerCopyWithSfence) {
|
||||||
|
ExecutionEnvironment *executionEnvironment = MockClDevice::prepareExecutionEnvironment(defaultHwInfo.get(), 0u);
|
||||||
auto productHelper = executionEnvironment->rootDeviceEnvironments[0]->productHelper.get();
|
auto productHelper = executionEnvironment->rootDeviceEnvironments[0]->productHelper.get();
|
||||||
if (!(productHelper->isBlitterFullySupported(*defaultHwInfo) && productHelper->isDcFlushMitigated())) {
|
if (!productHelper->isDcFlushMitigated()) {
|
||||||
GTEST_SKIP();
|
GTEST_SKIP();
|
||||||
}
|
}
|
||||||
|
|
||||||
auto blitterCalled = 0u;
|
VariableBackup<UltHwConfig> backup(&ultHwConfig);
|
||||||
auto mockBlitMemoryToAllocation = [&](const NEO::Device &device, NEO::GraphicsAllocation *memory, size_t offset, const void *hostPtr,
|
ultHwConfig.useGpuCopyForDcFlushMitigation = true;
|
||||||
Vec3<size_t> size) -> NEO::BlitOperationResult {
|
|
||||||
memcpy(memory->getUnderlyingBuffer(), hostPtr, size.x);
|
DebugManagerStateRestore restorer;
|
||||||
blitterCalled++;
|
debugManager.flags.AllowDcFlush.set(0);
|
||||||
return BlitOperationResult::success;
|
|
||||||
};
|
|
||||||
VariableBackup<NEO::BlitHelperFunctions::BlitMemoryToAllocationFunc> blitMemoryToAllocationFuncBackup(
|
|
||||||
&NEO::BlitHelperFunctions::blitMemoryToAllocation, mockBlitMemoryToAllocation);
|
|
||||||
|
|
||||||
auto *memoryManager = new MockMemoryManagerFailFirstAllocation(*executionEnvironment);
|
auto *memoryManager = new MockMemoryManagerFailFirstAllocation(*executionEnvironment);
|
||||||
executionEnvironment->memoryManager.reset(memoryManager);
|
executionEnvironment->memoryManager.reset(memoryManager);
|
||||||
|
@ -626,6 +622,7 @@ TEST(Buffer, givenDcFlushMitigationWhenCreateBufferCopyHostptrThenUseBlitterCopy
|
||||||
auto device = std::make_unique<MockClDevice>(MockDevice::create<MockDevice>(executionEnvironment, 0));
|
auto device = std::make_unique<MockClDevice>(MockDevice::create<MockDevice>(executionEnvironment, 0));
|
||||||
|
|
||||||
MockContext ctx(device.get());
|
MockContext ctx(device.get());
|
||||||
|
CpuIntrinsicsTests::sfenceCounter.store(0u);
|
||||||
|
|
||||||
cl_int retVal = 0;
|
cl_int retVal = 0;
|
||||||
cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR;
|
cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR;
|
||||||
|
@ -634,7 +631,8 @@ TEST(Buffer, givenDcFlushMitigationWhenCreateBufferCopyHostptrThenUseBlitterCopy
|
||||||
std::unique_ptr<Buffer> buffer(Buffer::create(&ctx, flags, sizeof(memory), memory, retVal));
|
std::unique_ptr<Buffer> buffer(Buffer::create(&ctx, flags, sizeof(memory), memory, retVal));
|
||||||
|
|
||||||
ASSERT_NE(nullptr, buffer.get());
|
ASSERT_NE(nullptr, buffer.get());
|
||||||
EXPECT_EQ(blitterCalled, 1u);
|
EXPECT_EQ(1u, CpuIntrinsicsTests::sfenceCounter.load());
|
||||||
|
CpuIntrinsicsTests::sfenceCounter.store(0u);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(Buffer, givenPropertiesWithClDeviceHandleListKHRWhenCreateBufferThenCorrectBufferIsSet) {
|
TEST(Buffer, givenPropertiesWithClDeviceHandleListKHRWhenCreateBufferThenCorrectBufferIsSet) {
|
||||||
|
|
|
@ -50,7 +50,7 @@ void BaseUltConfigListener::OnTestEnd(const ::testing::TestInfo &) {
|
||||||
|
|
||||||
// Ensure that global state is restored
|
// Ensure that global state is restored
|
||||||
UltHwConfig expectedState{};
|
UltHwConfig expectedState{};
|
||||||
static_assert(sizeof(UltHwConfig) == (16 * sizeof(bool) + sizeof(const char *)), ""); // Ensure that there is no internal padding
|
static_assert(sizeof(UltHwConfig) == (17 * sizeof(bool) + sizeof(const char *)) + sizeof(UltHwConfig::padding), ""); // Ensure that there is no internal padding
|
||||||
EXPECT_EQ(0, memcmp(&expectedState, &ultHwConfig, sizeof(UltHwConfig)));
|
EXPECT_EQ(0, memcmp(&expectedState, &ultHwConfig, sizeof(UltHwConfig)));
|
||||||
|
|
||||||
EXPECT_EQ(0, memcmp(&referencedHwInfo.platform, &defaultHwInfo->platform, sizeof(PLATFORM)));
|
EXPECT_EQ(0, memcmp(&referencedHwInfo.platform, &defaultHwInfo->platform, sizeof(PLATFORM)));
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2020-2023 Intel Corporation
|
* Copyright (C) 2020-2024 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -16,6 +16,7 @@ struct UltHwConfig {
|
||||||
bool useWaitForTimestamps = false;
|
bool useWaitForTimestamps = false;
|
||||||
bool useBlitSplit = false;
|
bool useBlitSplit = false;
|
||||||
bool useFirstSubmissionInitDevice = false;
|
bool useFirstSubmissionInitDevice = false;
|
||||||
|
bool useGpuCopyForDcFlushMitigation = false;
|
||||||
|
|
||||||
bool csrFailInitDirectSubmission = false;
|
bool csrFailInitDirectSubmission = false;
|
||||||
bool csrBaseCallDirectSubmissionAvailable = false;
|
bool csrBaseCallDirectSubmissionAvailable = false;
|
||||||
|
@ -28,6 +29,8 @@ struct UltHwConfig {
|
||||||
bool csrCreatePreemptionReturnValue = true;
|
bool csrCreatePreemptionReturnValue = true;
|
||||||
bool reserved = false;
|
bool reserved = false;
|
||||||
|
|
||||||
|
char padding[7];
|
||||||
|
|
||||||
const char *aubTestName = nullptr;
|
const char *aubTestName = nullptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue