mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-19 06:24:51 +08:00
performance: Use lock pointer copy for dc flush mitigation
Resolves: NEO-12898 Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
6f4994c269
commit
b8be102455
@@ -658,6 +658,13 @@ Buffer *Context::BufferPoolAllocator::allocateBufferFromPool(const MemoryPropert
|
||||
|
||||
bufferFromPool = this->allocateFromPools(memoryProperties, flags, flagsIntel, requestedSize, hostPtr, errcodeRet);
|
||||
if (bufferFromPool != nullptr) {
|
||||
for (const auto rootDeviceIndex : this->context->getRootDeviceIndices()) {
|
||||
auto &csr = this->context->getSpecialQueue(rootDeviceIndex)->getGpgpuCommandStreamReceiver();
|
||||
auto lock = csr.obtainUniqueOwnership();
|
||||
csr.registerDcFlushForDcMitigation();
|
||||
csr.flushTagUpdate();
|
||||
}
|
||||
|
||||
return bufferFromPool;
|
||||
}
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
#include "shared/source/memory_manager/memory_operations_handler.h"
|
||||
#include "shared/source/memory_manager/migration_sync_data.h"
|
||||
#include "shared/source/os_interface/os_interface.h"
|
||||
#include "shared/source/utilities/cpuintrinsics.h"
|
||||
|
||||
#include "opencl/source/cl_device/cl_device.h"
|
||||
#include "opencl/source/command_queue/command_queue.h"
|
||||
@@ -212,8 +213,7 @@ bool inline copyHostPointer(Buffer *buffer,
|
||||
size <= Buffer::maxBufferSizeForCopyOnCpu &&
|
||||
isCompressionEnabled == false &&
|
||||
productHelper.getLocalMemoryAccessMode(hwInfo) != LocalMemoryAccessMode::cpuAccessDisallowed &&
|
||||
isLockable &&
|
||||
!isGpuCopyRequiredForDcFlushMitigation;
|
||||
isLockable;
|
||||
|
||||
if (debugManager.flags.CopyHostPtrOnCpu.get() != -1) {
|
||||
copyOnCpuAllowed = debugManager.flags.CopyHostPtrOnCpu.get() == 1;
|
||||
@@ -222,6 +222,9 @@ bool inline copyHostPointer(Buffer *buffer,
|
||||
memory->setAubWritable(true, GraphicsAllocation::defaultBank);
|
||||
memory->setTbxWritable(true, GraphicsAllocation::defaultBank);
|
||||
memcpy_s(ptrOffset(lockedPointer, buffer->getOffset()), size, hostPtr, size);
|
||||
if (isGpuCopyRequiredForDcFlushMitigation) {
|
||||
CpuIntrinsics::sfence();
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
auto blitMemoryToAllocationResult = BlitOperationResult::unsupported;
|
||||
|
||||
@@ -20,7 +20,7 @@ bool CommandQueue::isTimestampWaitEnabled() {
|
||||
}
|
||||
|
||||
bool checkIsGpuCopyRequiredForDcFlushMitigation(AllocationType type) {
|
||||
return false;
|
||||
return ultHwConfig.useGpuCopyForDcFlushMitigation;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
@@ -599,26 +599,31 @@ TEST(Buffer, givenClMemCopyHostPointerPassedToBufferCreateWhenAllocationIsNotInS
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Buffer, givenDcFlushMitigationWhenCreateBufferCopyHostptrThenUseBlitterCopy) {
|
||||
DebugManagerStateRestore restorer;
|
||||
debugManager.flags.AllowDcFlush.set(0);
|
||||
namespace CpuIntrinsicsTests {
|
||||
extern std::atomic<uint32_t> sfenceCounter;
|
||||
} // namespace CpuIntrinsicsTests
|
||||
|
||||
TEST(Buffer, givenDcFlushMitigationWhenCreateBufferCopyHostptrThenUseMemcpy) {
|
||||
ExecutionEnvironment *executionEnvironment = MockClDevice::prepareExecutionEnvironment(defaultHwInfo.get(), 0u);
|
||||
executionEnvironment->rootDeviceEnvironments[0]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true;
|
||||
|
||||
auto productHelper = executionEnvironment->rootDeviceEnvironments[0]->productHelper.get();
|
||||
if (!(productHelper->isBlitterFullySupported(*defaultHwInfo) && productHelper->isDcFlushMitigated())) {
|
||||
if (!productHelper->isDcFlushMitigated()) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
auto blitterCalled = 0u;
|
||||
auto mockBlitMemoryToAllocation = [&](const NEO::Device &device, NEO::GraphicsAllocation *memory, size_t offset, const void *hostPtr,
|
||||
Vec3<size_t> size) -> NEO::BlitOperationResult {
|
||||
memcpy(memory->getUnderlyingBuffer(), hostPtr, size.x);
|
||||
blitterCalled++;
|
||||
return BlitOperationResult::success;
|
||||
};
|
||||
VariableBackup<NEO::BlitHelperFunctions::BlitMemoryToAllocationFunc> blitMemoryToAllocationFuncBackup(
|
||||
&NEO::BlitHelperFunctions::blitMemoryToAllocation, mockBlitMemoryToAllocation);
|
||||
|
||||
VariableBackup<NEO::BlitHelperFunctions::BlitMemoryToAllocationFunc> blitMemoryToAllocationFuncBackup(&NEO::BlitHelperFunctions::blitMemoryToAllocation, mockBlitMemoryToAllocation);
|
||||
VariableBackup<UltHwConfig> backup(&ultHwConfig);
|
||||
ultHwConfig.useGpuCopyForDcFlushMitigation = true;
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
debugManager.flags.AllowDcFlush.set(0);
|
||||
|
||||
auto *memoryManager = new MockMemoryManagerFailFirstAllocation(*executionEnvironment);
|
||||
executionEnvironment->memoryManager.reset(memoryManager);
|
||||
@@ -626,6 +631,7 @@ TEST(Buffer, givenDcFlushMitigationWhenCreateBufferCopyHostptrThenUseBlitterCopy
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::create<MockDevice>(executionEnvironment, 0));
|
||||
|
||||
MockContext ctx(device.get());
|
||||
CpuIntrinsicsTests::sfenceCounter.store(0u);
|
||||
|
||||
cl_int retVal = 0;
|
||||
cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR;
|
||||
@@ -634,7 +640,10 @@ TEST(Buffer, givenDcFlushMitigationWhenCreateBufferCopyHostptrThenUseBlitterCopy
|
||||
std::unique_ptr<Buffer> buffer(Buffer::create(&ctx, flags, sizeof(memory), memory, retVal));
|
||||
|
||||
ASSERT_NE(nullptr, buffer.get());
|
||||
EXPECT_EQ(blitterCalled, 1u);
|
||||
EXPECT_EQ(blitterCalled, 0u);
|
||||
EXPECT_EQ(ctx.getSpecialQueue(0)->taskCount, 0u);
|
||||
EXPECT_EQ(1u, CpuIntrinsicsTests::sfenceCounter.load());
|
||||
CpuIntrinsicsTests::sfenceCounter.store(0u);
|
||||
}
|
||||
|
||||
TEST(Buffer, givenPropertiesWithClDeviceHandleListKHRWhenCreateBufferThenCorrectBufferIsSet) {
|
||||
|
||||
@@ -8,6 +8,8 @@
|
||||
#pragma once
|
||||
namespace NEO {
|
||||
struct UltHwConfig {
|
||||
const char *aubTestName = nullptr;
|
||||
|
||||
bool mockedPrepareDeviceEnvironmentsFuncResult = true;
|
||||
bool useHwCsr = false;
|
||||
bool useMockedPrepareDeviceEnvironmentsFunc = true;
|
||||
@@ -16,6 +18,7 @@ struct UltHwConfig {
|
||||
bool useWaitForTimestamps = false;
|
||||
bool useBlitSplit = false;
|
||||
bool useFirstSubmissionInitDevice = false;
|
||||
bool useGpuCopyForDcFlushMitigation = false;
|
||||
|
||||
bool csrFailInitDirectSubmission = false;
|
||||
bool csrBaseCallDirectSubmissionAvailable = false;
|
||||
@@ -26,9 +29,6 @@ struct UltHwConfig {
|
||||
|
||||
bool csrBaseCallCreatePreemption = true;
|
||||
bool csrCreatePreemptionReturnValue = true;
|
||||
bool reserved = false;
|
||||
|
||||
const char *aubTestName = nullptr;
|
||||
};
|
||||
|
||||
extern UltHwConfig ultHwConfig;
|
||||
|
||||
Reference in New Issue
Block a user