From 0919cad8858075b5271abd7a32471205d3bc7e24 Mon Sep 17 00:00:00 2001 From: Lukasz Jobczyk Date: Wed, 2 Mar 2022 13:42:34 +0000 Subject: [PATCH] Add multithread enqueue blit OOQ test Signed-off-by: Lukasz Jobczyk --- opencl/source/command_queue/enqueue_common.h | 2 +- .../command_queue/ooq_task_tests_mt.cpp | 76 ++++++++++++++++++- 2 files changed, 76 insertions(+), 2 deletions(-) diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index 625d3436c0..600e3d45a3 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -127,7 +127,6 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, TagNodeBase *hwTimeStamps = nullptr; CommandStreamReceiver &computeCommandStreamReceiver = getGpgpuCommandStreamReceiver(); - auto commandStreamReceiverOwnership = computeCommandStreamReceiver.obtainUniqueOwnership(); EventBuilder eventBuilder; setupEvent(eventBuilder, event, commandType); @@ -137,6 +136,7 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, std::unique_ptr blockedCommandsData; std::unique_ptr printfHandler; TakeOwnershipWrapper> queueOwnership(*this); + auto commandStreamReceiverOwnership = computeCommandStreamReceiver.obtainUniqueOwnership(); auto blockQueue = false; auto taskLevel = 0u; diff --git a/opencl/test/unit_test/mt_tests/command_queue/ooq_task_tests_mt.cpp b/opencl/test/unit_test/mt_tests/command_queue/ooq_task_tests_mt.cpp index 1973d8f3b4..d4e34cd2f2 100644 --- a/opencl/test/unit_test/mt_tests/command_queue/ooq_task_tests_mt.cpp +++ b/opencl/test/unit_test/mt_tests/command_queue/ooq_task_tests_mt.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -8,6 +8,8 @@ #include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" +#include + using namespace NEO; struct OOQFixtureFactory : public HelloWorldFixtureFactory { @@ -89,4 +91,76 @@ TEST_F(OOQTaskTestsMt, GivenBlockedOnUserEventWhenEnqueingMarkerThenSuccessIsRet retVal = clReleaseEvent(userEvent); EXPECT_EQ(CL_SUCCESS, retVal); +} + +TEST_F(OOQTaskTestsMt, givenBlitterWhenEnqueueCopyAndKernelUsingMultipleThreadsThenSuccessReturned) { + auto hwInfo = *defaultHwInfo; + hwInfo.capabilityTable.blitterOperationsSupported = true; + REQUIRE_FULL_BLITTER_OR_SKIP(&hwInfo); + + DebugManagerStateRestore restorer; + DebugManager.flags.EnableBlitterForEnqueueOperations.set(1); + DebugManager.flags.DoCpuCopyOnReadBuffer.set(0); + DebugManager.flags.DoCpuCopyOnWriteBuffer.set(0); + + constexpr uint32_t numThreads = 32; + std::atomic_uint32_t barrier = numThreads; + std::array, numThreads> threads; + + auto device = MockClDevice::createWithNewExecutionEnvironment(&hwInfo, rootDeviceIndex); + MockClDevice clDevice(device); + auto cmdQ = createCommandQueue(&clDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE); + EXPECT_EQ(cmdQ->taskCount, 0u); + EXPECT_EQ(cmdQ->getGpgpuCommandStreamReceiver().peekTaskCount(), 0u); + EXPECT_EQ(cmdQ->peekBcsTaskCount(aub_stream::EngineType::ENGINE_BCS), 0u); + auto buffer = std::unique_ptr(BufferHelper<>::create()); + + for (auto &thread : threads) { + thread = std::async(std::launch::async, [&]() { + auto alignedReadPtr = alignedMalloc(BufferDefaults::sizeInBytes, MemoryConstants::cacheLineSize); + barrier.fetch_sub(1u); + while (barrier.load() != 0u) { + std::this_thread::yield(); + } + + auto retVal = EnqueueWriteBufferHelper<>::enqueueWriteBuffer(cmdQ, + buffer.get(), + CL_TRUE, + 0, + BufferDefaults::sizeInBytes, + alignedReadPtr, + nullptr, + 0, + nullptr, + nullptr); + EXPECT_EQ(CL_SUCCESS, retVal); + + size_t workSize[] = {64}; + retVal = EnqueueKernelHelper<>::enqueueKernel(cmdQ, KernelFixture::pKernel, 1, nullptr, workSize, workSize, 0, nullptr, nullptr); + EXPECT_EQ(CL_SUCCESS, retVal); + + retVal = EnqueueReadBufferHelper<>::enqueueReadBuffer(cmdQ, + buffer.get(), + CL_TRUE, + 0, + BufferDefaults::sizeInBytes, + alignedReadPtr, + nullptr, + 0, + nullptr, + nullptr); + EXPECT_EQ(CL_SUCCESS, retVal); + + alignedFree(alignedReadPtr); + }); + } + for (auto &thread : threads) { + thread.get(); + } + + EXPECT_NE(cmdQ->taskCount, 0u); + EXPECT_NE(cmdQ->getGpgpuCommandStreamReceiver().peekTaskCount(), 0u); + EXPECT_EQ(cmdQ->peekBcsTaskCount(aub_stream::EngineType::ENGINE_BCS), 2 * numThreads); + + clReleaseCommandQueue(cmdQ); } \ No newline at end of file