diff --git a/opencl/test/unit_test/test_files/igdrcl.config b/opencl/test/unit_test/test_files/igdrcl.config index 9b11df4e22..c741dd888f 100644 --- a/opencl/test/unit_test/test_files/igdrcl.config +++ b/opencl/test/unit_test/test_files/igdrcl.config @@ -411,3 +411,4 @@ DirectSubmissionReadBackRingBuffer = -1 ReadBackCommandBufferAllocation = -1 PrintImageBlitBlockCopyCmdDetails = 0 DirectSubmissionInsertExtraMiMemFenceCommands = -1 +DirectSubmissionInsertSfenceInstructionPriorToSubmission = -1 diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 2fc75eab81..c6b8ab0347 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -291,6 +291,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, EnableRingSwitchTagUpdateWa, -1, "-1: default, 0 DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionReadBackCommandBuffer, -1, "-1: default - disabled, 0 - disable, 1 - enable. If enabled, read first dword of cmd buffer after handling residency.") DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionReadBackRingBuffer, -1, "-1: default - disabled, 0 - disable, 1 - enable. If enabled, read first dword of ring buffer after handling residency.") DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionInsertExtraMiMemFenceCommands, -1, "-1: default, 0 - disable, 1 - enable. If enabled, add extra MI_MEM_FENCE instructions with acquire bit set") +DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionInsertSfenceInstructionPriorToSubmission, -1, "-1: default, 0 - disable, 1 - Instert _mm_sfence before unlocking semaphore only, 2 - insert before and after semaphore") /* IMPLICIT SCALING */ DECLARE_DEBUG_VARIABLE(int32_t, EnableWalkerPartition, -1, "-1: default, 0: disable, 1: enable, Enables Walker Partitioning via WPARID.") diff --git a/shared/source/direct_submission/direct_submission_hw.inl b/shared/source/direct_submission/direct_submission_hw.inl index 4e3460ce84..693d061787 100644 --- a/shared/source/direct_submission/direct_submission_hw.inl +++ b/shared/source/direct_submission/direct_submission_hw.inl @@ -435,8 +435,17 @@ bool DirectSubmissionHw::dispatchCommandBuffer(BatchBuffe reserved = *ringBufferStart; } + if (DebugManager.flags.DirectSubmissionInsertSfenceInstructionPriorToSubmission.get() >= 1) { + CpuIntrinsics::sfence(); + } + //unblock GPU semaphoreData->QueueWorkCount = currentQueueWorkCount; + + if (DebugManager.flags.DirectSubmissionInsertSfenceInstructionPriorToSubmission.get() == 2) { + CpuIntrinsics::sfence(); + } + cpuCachelineFlush(semaphorePtr, MemoryConstants::cacheLineSize); currentQueueWorkCount++; DirectSubmissionDiagnostics::diagnosticModeOneSubmit(diagnostic.get()); diff --git a/shared/source/utilities/cpuintrinsics.cpp b/shared/source/utilities/cpuintrinsics.cpp index dead04e570..dab9994447 100644 --- a/shared/source/utilities/cpuintrinsics.cpp +++ b/shared/source/utilities/cpuintrinsics.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2021 Intel Corporation + * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -20,6 +20,10 @@ void clFlush(void const *ptr) { _mm_clflush(ptr); } +void sfence() { + _mm_sfence(); +} + void pause() { _mm_pause(); } diff --git a/shared/source/utilities/cpuintrinsics.h b/shared/source/utilities/cpuintrinsics.h index 07cc1185ac..ce9fcee830 100644 --- a/shared/source/utilities/cpuintrinsics.h +++ b/shared/source/utilities/cpuintrinsics.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2021 Intel Corporation + * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -10,6 +10,8 @@ namespace NEO { namespace CpuIntrinsics { +void sfence(); + void clFlush(void const *ptr); void pause(); diff --git a/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp b/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp index 98b19f1627..94c5e0250d 100644 --- a/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp +++ b/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp @@ -10,6 +10,7 @@ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/direct_submission/dispatchers/render_dispatcher.h" #include "shared/source/helpers/flush_stamp.h" +#include "shared/source/utilities/cpuintrinsics.h" #include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/fixtures/direct_submission_fixture.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" @@ -23,6 +24,10 @@ #include "shared/test/common/mocks/mock_io_functions.h" #include "shared/test/common/test_macros/test.h" +namespace CpuIntrinsicsTests { +extern std::atomic sfenceCounter; +} // namespace CpuIntrinsicsTests + using DirectSubmissionTest = Test; using DirectSubmissionDispatchBufferTest = Test; @@ -699,3 +704,27 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, auto expectedValue = reinterpret_cast(directSubmission.ringCommandStream.getSpace(0))[0]; EXPECT_EQ(expectedValue, directSubmission.reserved); } + +HWTEST_F(DirectSubmissionDispatchBufferTest, givenDebugFlagSetWhenDispatchingWorkloadThenProgramSfenceInstruction) { + DebugManagerStateRestore restorer{}; + + DebugManager.flags.DirectSubmissionInsertSfenceInstructionPriorToSubmission.set(1); + using Dispatcher = BlitterDispatcher; + + FlushStampTracker flushStamp(true); + + MockDirectSubmissionHw directSubmission(*pDevice, *osContext.get()); + EXPECT_TRUE(directSubmission.initialize(true, true)); + + auto initialCounterValue = CpuIntrinsicsTests::sfenceCounter.load(); + + EXPECT_TRUE(directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp)); + + EXPECT_EQ(initialCounterValue + 1, CpuIntrinsicsTests::sfenceCounter); + + DebugManager.flags.DirectSubmissionInsertSfenceInstructionPriorToSubmission.set(2); + + EXPECT_TRUE(directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp)); + + EXPECT_EQ(initialCounterValue + 3, CpuIntrinsicsTests::sfenceCounter); +} diff --git a/shared/test/unit_test/utilities/cpuintrinsics.cpp b/shared/test/unit_test/utilities/cpuintrinsics.cpp index 420ef5f416..1b34ea7965 100644 --- a/shared/test/unit_test/utilities/cpuintrinsics.cpp +++ b/shared/test/unit_test/utilities/cpuintrinsics.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2021 Intel Corporation + * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -18,6 +18,7 @@ namespace CpuIntrinsicsTests { std::atomic lastClFlushedPtr(0u); std::atomic clFlushCounter(0u); std::atomic pauseCounter(0u); +std::atomic sfenceCounter(0u); volatile uint32_t *pauseAddress = nullptr; uint32_t pauseValue = 0u; @@ -34,6 +35,10 @@ void clFlush(void const *ptr) { CpuIntrinsicsTests::lastClFlushedPtr = reinterpret_cast(ptr); } +void sfence() { + CpuIntrinsicsTests::sfenceCounter++; +} + void pause() { CpuIntrinsicsTests::pauseCounter++; if (CpuIntrinsicsTests::pauseAddress != nullptr) { diff --git a/shared/test/unit_test/utilities/cpuintrinsics_tests.cpp b/shared/test/unit_test/utilities/cpuintrinsics_tests.cpp index 58d5c50a7d..7d06d0186c 100644 --- a/shared/test/unit_test/utilities/cpuintrinsics_tests.cpp +++ b/shared/test/unit_test/utilities/cpuintrinsics_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2021 Intel Corporation + * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -15,6 +15,7 @@ namespace CpuIntrinsicsTests { extern std::atomic lastClFlushedPtr; extern std::atomic pauseCounter; +extern std::atomic sfenceCounter; } // namespace CpuIntrinsicsTests TEST(CpuIntrinsicsTest, whenClFlushIsCalledThenExpectToPassPtrToSystemCall) { @@ -29,3 +30,9 @@ TEST(CpuIntrinsicsTest, whenPauseCalledThenExpectToIncreaseCounter) { NEO::CpuIntrinsics::pause(); EXPECT_EQ(oldCount + 1, CpuIntrinsicsTests::pauseCounter); } + +TEST(CpuIntrinsicsTest, whenSfenceCalledThenExpectToIncreaseCounter) { + uint32_t oldCount = CpuIntrinsicsTests::sfenceCounter.load(); + NEO::CpuIntrinsics::sfence(); + EXPECT_EQ(oldCount + 1, CpuIntrinsicsTests::sfenceCounter); +} \ No newline at end of file