mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Add debug flag that controls zero copy for CL_MEM_USE_HOST_PTR buffers.
- when flag is enabled driver will not go with zero copy path for CL_MEM_USE_HOST_PTR flag - flag doesn't work in shared context where we must accept zero copy storage. Change-Id: Idda94f296dd12e7e3ccb15f2224808287551ac97
This commit is contained in:

committed by
sys_ocldev

parent
d2817427af
commit
4cb86b4045
@ -210,7 +210,8 @@ void Buffer::checkMemory(cl_mem_flags flags,
|
||||
if (hostPtr) {
|
||||
if (alignUp(hostPtr, MemoryConstants::cacheLineSize) != hostPtr ||
|
||||
alignUp(size, MemoryConstants::cacheLineSize) != size ||
|
||||
minAddress > reinterpret_cast<uintptr_t>(hostPtr)) {
|
||||
minAddress > reinterpret_cast<uintptr_t>(hostPtr) ||
|
||||
DebugManager.flags.DisableZeroCopyForUseHostPtr.get()) {
|
||||
allocateMemory = true;
|
||||
isZeroCopy = false;
|
||||
copyMemoryFromHostPtr = true;
|
||||
|
@ -19,15 +19,23 @@
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
/*SIMULATION FLAGS*/
|
||||
DECLARE_DEBUG_VARIABLE(std::string, TbxServer, std::string("127.0.0.1"), "TCP-IP address of TBX server")
|
||||
DECLARE_DEBUG_VARIABLE(std::string, ProductFamilyOverride, std::string("unk"), "Specify product for use in AUB/TBX")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, SetCommandStreamReceiver, 0, "Set command stream receiver")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, TbxPort, 4321, "TCP-IP port of TBX server")
|
||||
DECLARE_DEBUG_VARIABLE(bool, FlattenBatchBufferForAUBDump, false, "Dump multi-level batch buffers to AUB as single, flat batch buffer")
|
||||
DECLARE_DEBUG_VARIABLE(bool, AddPatchInfoCommentsForAUBDump, false, "Dump comments containing allocations and patching information")
|
||||
|
||||
/*DEBUG FLAGS*/
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, InitializeMemoryInDebug, 0x10, "Memory initialization in debug")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, SchedulerSimulationReturnInstance, 0, "prints execution model related debug information")
|
||||
DECLARE_DEBUG_VARIABLE(bool, EnableDebugBreak, true, "Enable DEBUG_BREAKs")
|
||||
DECLARE_DEBUG_VARIABLE(bool, FlushAllCaches, false, "pipe controls between enqueues flush all possible caches")
|
||||
DECLARE_DEBUG_VARIABLE(bool, MakeEachEnqueueBlocking, false, "equivalent of finish after each enqueue")
|
||||
DECLARE_DEBUG_VARIABLE(bool, DoCpuCopyOnReadBuffer, false, "triggers CPU copy path for Read Buffer calls, only supported for some basic use cases ( no events, not blocked calls )")
|
||||
DECLARE_DEBUG_VARIABLE(bool, DoCpuCopyOnWriteBuffer, false, "triggers CPU copy path for Write Buffer calls, only supported for some basic use cases ( no events, not blocked calls )")
|
||||
DECLARE_DEBUG_VARIABLE(bool, DisableResourceRecycling, false, "when set to true disables resource recycling optimization")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, InitializeMemoryInDebug, 0x10, "Memory initialization in debug")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, SchedulerSimulationReturnInstance, 0, "prints execution model related debug information")
|
||||
DECLARE_DEBUG_VARIABLE(bool, ForceDispatchScheduler, false, "dispatches scheduler kernel instead of kernel enqueued")
|
||||
DECLARE_DEBUG_VARIABLE(bool, TrackParentEvents, false, "events track their parents")
|
||||
/*LOGGING FLAGS*/
|
||||
@ -54,13 +62,7 @@ DECLARE_DEBUG_VARIABLE(bool, DisableStatelessToStatefulOptimization, false, "Dis
|
||||
DECLARE_DEBUG_VARIABLE(bool, DisableConcurrentBlockExecution, 0, "disables concurrent block kernel execution")
|
||||
DECLARE_DEBUG_VARIABLE(bool, UseNewHeapAllocator, true, "Custom 4GB heap allocator is used")
|
||||
DECLARE_DEBUG_VARIABLE(bool, UseNoRingFlushesKmdMode, true, "Windows only, passes flag to KMD that informs KMD to not emit any ring buffer flushes.")
|
||||
/*SIMULATION FLAGS*/
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, SetCommandStreamReceiver, 0, "Set command stream receiver")
|
||||
DECLARE_DEBUG_VARIABLE(std::string, TbxServer, std::string("127.0.0.1"), "TCP-IP address of TBX server")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, TbxPort, 4321, "TCP-IP port of TBX server")
|
||||
DECLARE_DEBUG_VARIABLE(std::string, ProductFamilyOverride, std::string("unk"), "Specify product for use in AUB/TBX")
|
||||
DECLARE_DEBUG_VARIABLE(bool, FlattenBatchBufferForAUBDump, false, "Dump multi-level batch buffers to AUB as single, flat batch buffer")
|
||||
DECLARE_DEBUG_VARIABLE(bool, AddPatchInfoCommentsForAUBDump, false, "Dump comments containing allocations and patching information")
|
||||
DECLARE_DEBUG_VARIABLE(bool, DisableZeroCopyForUseHostPtr, false, "When active all buffer allocations created with CL_MEM_USE_HOST_PTR flag will not share memory with CPU.")
|
||||
/*FEATURE FLAGS*/
|
||||
DECLARE_DEBUG_VARIABLE(bool, EnableNV12, true, "Enables NV12 extension")
|
||||
DECLARE_DEBUG_VARIABLE(bool, EnablePackedYuv, true, "Enables cl_packed_yuv extension")
|
||||
@ -71,22 +73,22 @@ DECLARE_DEBUG_VARIABLE(bool, EnableDeferredDeleter, true, "Enables async deleter
|
||||
DECLARE_DEBUG_VARIABLE(bool, EnableAsyncDestroyAllocations, true, "Enables async destroying graphics allocations in mem obj destructor")
|
||||
DECLARE_DEBUG_VARIABLE(bool, EnableAsyncEventsHandler, true, "Enables async events handler")
|
||||
DECLARE_DEBUG_VARIABLE(bool, EnableForcePin, true, "Enables early pinning for memory object")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, Enable64kbpages, -1, "-1: default behaviour, 0 Disables, 1 Enables support for 64KB pages for driver allocated fine grain svm buffers")
|
||||
DECLARE_DEBUG_VARIABLE(bool, EnableComputeWorkSizeND, true, "Enables diffrent algorithm to compute local work size")
|
||||
DECLARE_DEBUG_VARIABLE(bool, EnableComputeWorkSizeSquared, false, "Enables algorithm to compute the most squared work group as possible")
|
||||
DECLARE_DEBUG_VARIABLE(bool, EnableVaLibCalls, true, "Enable cl-va sharing lib calls")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, Enable64kbpages, -1, "-1: default behaviour, 0 Disables, 1 Enables support for 64KB pages for driver allocated fine grain svm buffers")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, OverrideEnableKmdNotify, -1, "-1: dont override, 0: disable, 1: enable")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, OverrideKmdNotifyDelayMicroseconds, -1, "-1: dont override, 0: infinite timeout, >0: timeout in microseconds")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, OverrideEnableQuickKmdSleep, -1, "-1: dont override, 0: disable, 1: enable. It works only when Kmd Notify is enabled.")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, OverrideQuickKmdSleepDelayMicroseconds, -1, "-1: dont override, 0: infinite timeout, >0: timeout in microseconds")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, OverrideEnableQuickKmdSleepForSporadicWaits, -1, "-1: dont override, 0: disable, 1: enable. It works only when QuickKmdSleep is enabled.")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, OverrideDelayQuickKmdSleepForSporadicWaitsMicroseconds, -1, "-1: dont override, >0: timeout in microseconds")
|
||||
DECLARE_DEBUG_VARIABLE(bool, EnableVaLibCalls, true, "Enable cl-va sharing lib calls")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, CsrDispatchMode, 0, "Chooses DispatchMode for Csr")
|
||||
/*DRIVER TOGGLES*/
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ForceOCLVersion, 0, "Force specific OpenCL API version")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ForcePreemptionMode, -1, "Keep this variable in sync with PreemptionMode enum. -1 - devices default mode, 1 - disable, 2 - midBatch, 3 - threadGroup, 4 - midThread")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ForceWddmInterfaceVersion, 0, "Windows only. Force internal interface version. 0 is default value. Example: set 20 to force 2.0")
|
||||
DECLARE_DEBUG_VARIABLE(bool, HwQueueSupported, false, "Windows only. Pass flag to KMD during Wddm Context creation")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, NodeOrdinal, -1, "-1: default do not override, 0: ENGINE_RCS")
|
||||
DECLARE_DEBUG_VARIABLE(bool, UseMaxSimdSizeToDeduceMaxWorkgroupSize, false, "With this flag on, max workgroup size is deduced using SIMD32 instead of SIMD8, this causes the max wkg size to be 4 times bigger")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, OverrideThreadArbitrationPolicy, -1, "-1 (dont override) or any valid config (0: Age Based, 1: Round Robin)")
|
||||
DECLARE_DEBUG_VARIABLE(bool, HwQueueSupported, false, "Windows only. Pass flag to KMD during Wddm Context creation")
|
||||
DECLARE_DEBUG_VARIABLE(bool, UseMaxSimdSizeToDeduceMaxWorkgroupSize, false, "With this flag on, max workgroup size is deduced using SIMD32 instead of SIMD8, this causes the max wkg size to be 4 times bigger")
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Intel Corporation
|
||||
* Copyright (c) 2017 - 2018, Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@ -107,7 +107,6 @@ TEST_P(ZeroCopyBufferTest, CheckCacheAlignedPointerResultsInZeroCopy) {
|
||||
//check if buffer always have properly aligned storage ( PAGE )
|
||||
EXPECT_EQ(alignUp(buffer->getCpuAddress(), MemoryConstants::cacheLineSize), buffer->getCpuAddress());
|
||||
|
||||
|
||||
delete buffer;
|
||||
}
|
||||
|
||||
@ -119,52 +118,63 @@ INSTANTIATE_TEST_CASE_P(
|
||||
TEST(ZeroCopyBufferTestWithSharedContext, GivenContextThatIsSharedWhenAskedForBufferCreationThenAlwaysResultsInZeroCopy) {
|
||||
|
||||
MockContext context;
|
||||
auto host_ptr = (void*)0x1001;
|
||||
auto host_ptr = reinterpret_cast<void *>(0x1001);
|
||||
auto size = 64;
|
||||
auto retVal = CL_SUCCESS;
|
||||
|
||||
context.isSharedContext = true;
|
||||
auto buffer = Buffer::create(
|
||||
&context,
|
||||
CL_MEM_USE_HOST_PTR,
|
||||
size,
|
||||
host_ptr,
|
||||
retVal);
|
||||
std::unique_ptr<Buffer> buffer(Buffer::create(&context, CL_MEM_USE_HOST_PTR, size, host_ptr, retVal));
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_TRUE(buffer->isMemObjZeroCopy()) << "Zero Copy not handled properly";
|
||||
|
||||
if (buffer->getGraphicsAllocation()->is32BitAllocation == false)
|
||||
{
|
||||
EXPECT_EQ(host_ptr, (void*)buffer->getGraphicsAllocation()->getUnderlyingBuffer());
|
||||
if (buffer->getGraphicsAllocation()->is32BitAllocation == false) {
|
||||
EXPECT_EQ(host_ptr, buffer->getGraphicsAllocation()->getUnderlyingBuffer());
|
||||
}
|
||||
}
|
||||
|
||||
delete buffer;
|
||||
TEST(ZeroCopyBufferTestWithSharedContext, GivenContextThatIsSharedAndDisableZeroCopyFlagWhenAskedForBufferCreationThenAlwaysResultsInZeroCopy) {
|
||||
DebugManagerStateRestore stateRestore;
|
||||
DebugManager.flags.DisableZeroCopyForUseHostPtr.set(true);
|
||||
|
||||
MockContext context;
|
||||
auto host_ptr = reinterpret_cast<void *>(0x1001);
|
||||
auto size = 64;
|
||||
auto retVal = CL_SUCCESS;
|
||||
|
||||
context.isSharedContext = true;
|
||||
std::unique_ptr<Buffer> buffer(Buffer::create(&context, CL_MEM_USE_HOST_PTR, size, host_ptr, retVal));
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_TRUE(buffer->isMemObjZeroCopy());
|
||||
}
|
||||
|
||||
TEST(ZeroCopyBufferWith32BitAddressing, GivenDeviceSupporting32BitAddressingWhenAskedForBufferCreationFromHostPtrThenNonZeroCopyBufferIsReturned)
|
||||
{
|
||||
TEST(ZeroCopyWithDebugFlag, GivenInputsThatWouldResultInZeroCopyAndDisableZeroCopyFlagWhenBufferIsCreatedThenNonZeroCopyBufferIsReturned) {
|
||||
DebugManagerStateRestore stateRestore;
|
||||
DebugManager.flags.DisableZeroCopyForUseHostPtr.set(true);
|
||||
MockContext context;
|
||||
auto host_ptr = reinterpret_cast<void *>(alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize));
|
||||
auto size = MemoryConstants::pageSize;
|
||||
auto retVal = CL_SUCCESS;
|
||||
|
||||
std::unique_ptr<Buffer> buffer(Buffer::create(&context, CL_MEM_USE_HOST_PTR, size, host_ptr, retVal));
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_FALSE(buffer->isMemObjZeroCopy());
|
||||
alignedFree(host_ptr);
|
||||
}
|
||||
|
||||
TEST(ZeroCopyBufferWith32BitAddressing, GivenDeviceSupporting32BitAddressingWhenAskedForBufferCreationFromHostPtrThenNonZeroCopyBufferIsReturned) {
|
||||
DebugManagerStateRestore dbgRestorer;
|
||||
{
|
||||
DebugManager.flags.Force32bitAddressing.set(true);
|
||||
MockContext context;
|
||||
auto host_ptr = (void *)alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize);
|
||||
auto size = MemoryConstants::pageSize;
|
||||
auto retVal = CL_SUCCESS;
|
||||
|
||||
auto buffer = Buffer::create(
|
||||
&context,
|
||||
CL_MEM_USE_HOST_PTR,
|
||||
size,
|
||||
host_ptr,
|
||||
retVal);
|
||||
std::unique_ptr<Buffer> buffer(Buffer::create(&context, CL_MEM_USE_HOST_PTR, size, host_ptr, retVal));
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
EXPECT_TRUE(buffer->isMemObjZeroCopy());
|
||||
if (is64bit) {
|
||||
EXPECT_TRUE(buffer->getGraphicsAllocation()->is32BitAllocation);
|
||||
}
|
||||
delete buffer;
|
||||
alignedFree(host_ptr);
|
||||
DebugManager.flags.Force32bitAddressing.set(false);
|
||||
}
|
||||
}
|
||||
|
@ -63,3 +63,4 @@ PrintDispatchParameters = false
|
||||
AddPatchInfoCommentsForAUBDump = false
|
||||
HwQueueSupported = false
|
||||
ForceWddmInterfaceVersion = 0
|
||||
DisableZeroCopyForUseHostPtr = false
|
Reference in New Issue
Block a user