diff --git a/runtime/mem_obj/buffer.cpp b/runtime/mem_obj/buffer.cpp index 8bde9146aa..f69bbb3a8f 100644 --- a/runtime/mem_obj/buffer.cpp +++ b/runtime/mem_obj/buffer.cpp @@ -210,7 +210,8 @@ void Buffer::checkMemory(cl_mem_flags flags, if (hostPtr) { if (alignUp(hostPtr, MemoryConstants::cacheLineSize) != hostPtr || alignUp(size, MemoryConstants::cacheLineSize) != size || - minAddress > reinterpret_cast(hostPtr)) { + minAddress > reinterpret_cast(hostPtr) || + DebugManager.flags.DisableZeroCopyForUseHostPtr.get()) { allocateMemory = true; isZeroCopy = false; copyMemoryFromHostPtr = true; diff --git a/runtime/os_interface/DebugVariables.inl b/runtime/os_interface/DebugVariables.inl index c16d6112dc..772e01cb1c 100644 --- a/runtime/os_interface/DebugVariables.inl +++ b/runtime/os_interface/DebugVariables.inl @@ -19,15 +19,23 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ +/*SIMULATION FLAGS*/ +DECLARE_DEBUG_VARIABLE(std::string, TbxServer, std::string("127.0.0.1"), "TCP-IP address of TBX server") +DECLARE_DEBUG_VARIABLE(std::string, ProductFamilyOverride, std::string("unk"), "Specify product for use in AUB/TBX") +DECLARE_DEBUG_VARIABLE(int32_t, SetCommandStreamReceiver, 0, "Set command stream receiver") +DECLARE_DEBUG_VARIABLE(int32_t, TbxPort, 4321, "TCP-IP port of TBX server") +DECLARE_DEBUG_VARIABLE(bool, FlattenBatchBufferForAUBDump, false, "Dump multi-level batch buffers to AUB as single, flat batch buffer") +DECLARE_DEBUG_VARIABLE(bool, AddPatchInfoCommentsForAUBDump, false, "Dump comments containing allocations and patching information") + /*DEBUG FLAGS*/ +DECLARE_DEBUG_VARIABLE(int32_t, InitializeMemoryInDebug, 0x10, "Memory initialization in debug") +DECLARE_DEBUG_VARIABLE(int32_t, SchedulerSimulationReturnInstance, 0, "prints execution model related debug information") DECLARE_DEBUG_VARIABLE(bool, EnableDebugBreak, true, "Enable DEBUG_BREAKs") DECLARE_DEBUG_VARIABLE(bool, FlushAllCaches, false, "pipe controls between enqueues flush all possible caches") DECLARE_DEBUG_VARIABLE(bool, MakeEachEnqueueBlocking, false, "equivalent of finish after each enqueue") DECLARE_DEBUG_VARIABLE(bool, DoCpuCopyOnReadBuffer, false, "triggers CPU copy path for Read Buffer calls, only supported for some basic use cases ( no events, not blocked calls )") DECLARE_DEBUG_VARIABLE(bool, DoCpuCopyOnWriteBuffer, false, "triggers CPU copy path for Write Buffer calls, only supported for some basic use cases ( no events, not blocked calls )") DECLARE_DEBUG_VARIABLE(bool, DisableResourceRecycling, false, "when set to true disables resource recycling optimization") -DECLARE_DEBUG_VARIABLE(int32_t, InitializeMemoryInDebug, 0x10, "Memory initialization in debug") -DECLARE_DEBUG_VARIABLE(int32_t, SchedulerSimulationReturnInstance, 0, "prints execution model related debug information") DECLARE_DEBUG_VARIABLE(bool, ForceDispatchScheduler, false, "dispatches scheduler kernel instead of kernel enqueued") DECLARE_DEBUG_VARIABLE(bool, TrackParentEvents, false, "events track their parents") /*LOGGING FLAGS*/ @@ -54,13 +62,7 @@ DECLARE_DEBUG_VARIABLE(bool, DisableStatelessToStatefulOptimization, false, "Dis DECLARE_DEBUG_VARIABLE(bool, DisableConcurrentBlockExecution, 0, "disables concurrent block kernel execution") DECLARE_DEBUG_VARIABLE(bool, UseNewHeapAllocator, true, "Custom 4GB heap allocator is used") DECLARE_DEBUG_VARIABLE(bool, UseNoRingFlushesKmdMode, true, "Windows only, passes flag to KMD that informs KMD to not emit any ring buffer flushes.") -/*SIMULATION FLAGS*/ -DECLARE_DEBUG_VARIABLE(int32_t, SetCommandStreamReceiver, 0, "Set command stream receiver") -DECLARE_DEBUG_VARIABLE(std::string, TbxServer, std::string("127.0.0.1"), "TCP-IP address of TBX server") -DECLARE_DEBUG_VARIABLE(int32_t, TbxPort, 4321, "TCP-IP port of TBX server") -DECLARE_DEBUG_VARIABLE(std::string, ProductFamilyOverride, std::string("unk"), "Specify product for use in AUB/TBX") -DECLARE_DEBUG_VARIABLE(bool, FlattenBatchBufferForAUBDump, false, "Dump multi-level batch buffers to AUB as single, flat batch buffer") -DECLARE_DEBUG_VARIABLE(bool, AddPatchInfoCommentsForAUBDump, false, "Dump comments containing allocations and patching information") +DECLARE_DEBUG_VARIABLE(bool, DisableZeroCopyForUseHostPtr, false, "When active all buffer allocations created with CL_MEM_USE_HOST_PTR flag will not share memory with CPU.") /*FEATURE FLAGS*/ DECLARE_DEBUG_VARIABLE(bool, EnableNV12, true, "Enables NV12 extension") DECLARE_DEBUG_VARIABLE(bool, EnablePackedYuv, true, "Enables cl_packed_yuv extension") @@ -71,22 +73,22 @@ DECLARE_DEBUG_VARIABLE(bool, EnableDeferredDeleter, true, "Enables async deleter DECLARE_DEBUG_VARIABLE(bool, EnableAsyncDestroyAllocations, true, "Enables async destroying graphics allocations in mem obj destructor") DECLARE_DEBUG_VARIABLE(bool, EnableAsyncEventsHandler, true, "Enables async events handler") DECLARE_DEBUG_VARIABLE(bool, EnableForcePin, true, "Enables early pinning for memory object") -DECLARE_DEBUG_VARIABLE(int32_t, Enable64kbpages, -1, "-1: default behaviour, 0 Disables, 1 Enables support for 64KB pages for driver allocated fine grain svm buffers") DECLARE_DEBUG_VARIABLE(bool, EnableComputeWorkSizeND, true, "Enables diffrent algorithm to compute local work size") DECLARE_DEBUG_VARIABLE(bool, EnableComputeWorkSizeSquared, false, "Enables algorithm to compute the most squared work group as possible") +DECLARE_DEBUG_VARIABLE(bool, EnableVaLibCalls, true, "Enable cl-va sharing lib calls") +DECLARE_DEBUG_VARIABLE(int32_t, Enable64kbpages, -1, "-1: default behaviour, 0 Disables, 1 Enables support for 64KB pages for driver allocated fine grain svm buffers") DECLARE_DEBUG_VARIABLE(int32_t, OverrideEnableKmdNotify, -1, "-1: dont override, 0: disable, 1: enable") DECLARE_DEBUG_VARIABLE(int32_t, OverrideKmdNotifyDelayMicroseconds, -1, "-1: dont override, 0: infinite timeout, >0: timeout in microseconds") DECLARE_DEBUG_VARIABLE(int32_t, OverrideEnableQuickKmdSleep, -1, "-1: dont override, 0: disable, 1: enable. It works only when Kmd Notify is enabled.") DECLARE_DEBUG_VARIABLE(int32_t, OverrideQuickKmdSleepDelayMicroseconds, -1, "-1: dont override, 0: infinite timeout, >0: timeout in microseconds") DECLARE_DEBUG_VARIABLE(int32_t, OverrideEnableQuickKmdSleepForSporadicWaits, -1, "-1: dont override, 0: disable, 1: enable. It works only when QuickKmdSleep is enabled.") DECLARE_DEBUG_VARIABLE(int32_t, OverrideDelayQuickKmdSleepForSporadicWaitsMicroseconds, -1, "-1: dont override, >0: timeout in microseconds") -DECLARE_DEBUG_VARIABLE(bool, EnableVaLibCalls, true, "Enable cl-va sharing lib calls") DECLARE_DEBUG_VARIABLE(int32_t, CsrDispatchMode, 0, "Chooses DispatchMode for Csr") /*DRIVER TOGGLES*/ DECLARE_DEBUG_VARIABLE(int32_t, ForceOCLVersion, 0, "Force specific OpenCL API version") DECLARE_DEBUG_VARIABLE(int32_t, ForcePreemptionMode, -1, "Keep this variable in sync with PreemptionMode enum. -1 - devices default mode, 1 - disable, 2 - midBatch, 3 - threadGroup, 4 - midThread") DECLARE_DEBUG_VARIABLE(int32_t, ForceWddmInterfaceVersion, 0, "Windows only. Force internal interface version. 0 is default value. Example: set 20 to force 2.0") -DECLARE_DEBUG_VARIABLE(bool, HwQueueSupported, false, "Windows only. Pass flag to KMD during Wddm Context creation") DECLARE_DEBUG_VARIABLE(int32_t, NodeOrdinal, -1, "-1: default do not override, 0: ENGINE_RCS") -DECLARE_DEBUG_VARIABLE(bool, UseMaxSimdSizeToDeduceMaxWorkgroupSize, false, "With this flag on, max workgroup size is deduced using SIMD32 instead of SIMD8, this causes the max wkg size to be 4 times bigger") DECLARE_DEBUG_VARIABLE(int32_t, OverrideThreadArbitrationPolicy, -1, "-1 (dont override) or any valid config (0: Age Based, 1: Round Robin)") +DECLARE_DEBUG_VARIABLE(bool, HwQueueSupported, false, "Windows only. Pass flag to KMD during Wddm Context creation") +DECLARE_DEBUG_VARIABLE(bool, UseMaxSimdSizeToDeduceMaxWorkgroupSize, false, "With this flag on, max workgroup size is deduced using SIMD32 instead of SIMD8, this causes the max wkg size to be 4 times bigger") diff --git a/unit_tests/mem_obj/zero_copy_tests.cpp b/unit_tests/mem_obj/zero_copy_tests.cpp index 1227e6cca2..4663c1aefb 100644 --- a/unit_tests/mem_obj/zero_copy_tests.cpp +++ b/unit_tests/mem_obj/zero_copy_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, Intel Corporation + * Copyright (c) 2017 - 2018, Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -80,7 +80,7 @@ std::tuple Inputs[] = {std::make_tup std::make_tuple((cl_mem_flags)CL_MEM_COPY_HOST_PTR, CacheLinedMisAlignedSize, MemoryConstants::preferredAlignment, CacheLinedMisAlignedSize, true, false), std::make_tuple((cl_mem_flags)NULL, 0, 0, CacheLinedMisAlignedSize, true, false), std::make_tuple((cl_mem_flags)NULL, 0, 0, CacheLinedAlignedSize, true, true)}; -//clang-format on +// clang-format on TEST_P(ZeroCopyBufferTest, CheckCacheAlignedPointerResultsInZeroCopy) { @@ -104,10 +104,9 @@ TEST_P(ZeroCopyBufferTest, CheckCacheAlignedPointerResultsInZeroCopy) { EXPECT_NE(nullptr, buffer->getCpuAddress()); - //check if buffer always have properly aligned storage ( PAGE ) + //check if buffer always have properly aligned storage ( PAGE ) EXPECT_EQ(alignUp(buffer->getCpuAddress(), MemoryConstants::cacheLineSize), buffer->getCpuAddress()); - delete buffer; } @@ -119,52 +118,63 @@ INSTANTIATE_TEST_CASE_P( TEST(ZeroCopyBufferTestWithSharedContext, GivenContextThatIsSharedWhenAskedForBufferCreationThenAlwaysResultsInZeroCopy) { MockContext context; - auto host_ptr = (void*)0x1001; + auto host_ptr = reinterpret_cast(0x1001); auto size = 64; auto retVal = CL_SUCCESS; context.isSharedContext = true; - auto buffer = Buffer::create( - &context, - CL_MEM_USE_HOST_PTR, - size, - host_ptr, - retVal); + std::unique_ptr buffer(Buffer::create(&context, CL_MEM_USE_HOST_PTR, size, host_ptr, retVal)); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_TRUE(buffer->isMemObjZeroCopy()) << "Zero Copy not handled properly"; - if (buffer->getGraphicsAllocation()->is32BitAllocation == false) - { - EXPECT_EQ(host_ptr, (void*)buffer->getGraphicsAllocation()->getUnderlyingBuffer()); + if (buffer->getGraphicsAllocation()->is32BitAllocation == false) { + EXPECT_EQ(host_ptr, buffer->getGraphicsAllocation()->getUnderlyingBuffer()); } - - delete buffer; } -TEST(ZeroCopyBufferWith32BitAddressing, GivenDeviceSupporting32BitAddressingWhenAskedForBufferCreationFromHostPtrThenNonZeroCopyBufferIsReturned) -{ +TEST(ZeroCopyBufferTestWithSharedContext, GivenContextThatIsSharedAndDisableZeroCopyFlagWhenAskedForBufferCreationThenAlwaysResultsInZeroCopy) { + DebugManagerStateRestore stateRestore; + DebugManager.flags.DisableZeroCopyForUseHostPtr.set(true); + + MockContext context; + auto host_ptr = reinterpret_cast(0x1001); + auto size = 64; + auto retVal = CL_SUCCESS; + + context.isSharedContext = true; + std::unique_ptr buffer(Buffer::create(&context, CL_MEM_USE_HOST_PTR, size, host_ptr, retVal)); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_TRUE(buffer->isMemObjZeroCopy()); +} + +TEST(ZeroCopyWithDebugFlag, GivenInputsThatWouldResultInZeroCopyAndDisableZeroCopyFlagWhenBufferIsCreatedThenNonZeroCopyBufferIsReturned) { + DebugManagerStateRestore stateRestore; + DebugManager.flags.DisableZeroCopyForUseHostPtr.set(true); + MockContext context; + auto host_ptr = reinterpret_cast(alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize)); + auto size = MemoryConstants::pageSize; + auto retVal = CL_SUCCESS; + + std::unique_ptr buffer(Buffer::create(&context, CL_MEM_USE_HOST_PTR, size, host_ptr, retVal)); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_FALSE(buffer->isMemObjZeroCopy()); + alignedFree(host_ptr); +} + +TEST(ZeroCopyBufferWith32BitAddressing, GivenDeviceSupporting32BitAddressingWhenAskedForBufferCreationFromHostPtrThenNonZeroCopyBufferIsReturned) { DebugManagerStateRestore dbgRestorer; - { - DebugManager.flags.Force32bitAddressing.set(true); - MockContext context; - auto host_ptr = (void*)alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize); - auto size = MemoryConstants::pageSize; - auto retVal = CL_SUCCESS; + DebugManager.flags.Force32bitAddressing.set(true); + MockContext context; + auto host_ptr = (void *)alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize); + auto size = MemoryConstants::pageSize; + auto retVal = CL_SUCCESS; - auto buffer = Buffer::create( - &context, - CL_MEM_USE_HOST_PTR, - size, - host_ptr, - retVal); - EXPECT_EQ(CL_SUCCESS, retVal); + std::unique_ptr buffer(Buffer::create(&context, CL_MEM_USE_HOST_PTR, size, host_ptr, retVal)); + EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_TRUE(buffer->isMemObjZeroCopy()); - if( is64bit) { - EXPECT_TRUE(buffer->getGraphicsAllocation()->is32BitAllocation); - } - delete buffer; - alignedFree(host_ptr); - DebugManager.flags.Force32bitAddressing.set(false); + EXPECT_TRUE(buffer->isMemObjZeroCopy()); + if (is64bit) { + EXPECT_TRUE(buffer->getGraphicsAllocation()->is32BitAllocation); } + alignedFree(host_ptr); } diff --git a/unit_tests/test_files/igdrcl.config b/unit_tests/test_files/igdrcl.config index 4e4d31f3ff..48acafb84e 100644 --- a/unit_tests/test_files/igdrcl.config +++ b/unit_tests/test_files/igdrcl.config @@ -63,3 +63,4 @@ PrintDispatchParameters = false AddPatchInfoCommentsForAUBDump = false HwQueueSupported = false ForceWddmInterfaceVersion = 0 +DisableZeroCopyForUseHostPtr = false \ No newline at end of file