Add multithread enqueue blit OOQ test

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk
2022-03-04 09:46:29 +00:00
committed by Compute-Runtime-Automation
parent 1a823356a3
commit f91ae9d59c
3 changed files with 84 additions and 6 deletions

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2018-2021 Intel Corporation * Copyright (C) 2018-2022 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@@ -52,8 +52,8 @@ void *CommandQueue::cpuDataTransferHandler(TransferProperties &transferPropertie
*eventsRequest.outEvent = outEventObj; *eventsRequest.outEvent = outEventObj;
} }
auto commandStreamReceieverOwnership = getGpgpuCommandStreamReceiver().obtainUniqueOwnership();
TakeOwnershipWrapper<CommandQueue> queueOwnership(*this); TakeOwnershipWrapper<CommandQueue> queueOwnership(*this);
auto commandStreamReceieverOwnership = getGpgpuCommandStreamReceiver().obtainUniqueOwnership();
auto blockQueue = false; auto blockQueue = false;
auto taskLevel = 0u; auto taskLevel = 0u;
@@ -80,8 +80,8 @@ void *CommandQueue::cpuDataTransferHandler(TransferProperties &transferPropertie
eventBuilder); eventBuilder);
} }
queueOwnership.unlock();
commandStreamReceieverOwnership.unlock(); commandStreamReceieverOwnership.unlock();
queueOwnership.unlock();
// read/write buffers are always blocking // read/write buffers are always blocking
if (!blockQueue || transferProperties.blocking) { if (!blockQueue || transferProperties.blocking) {

View File

@@ -127,7 +127,6 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
TagNodeBase *hwTimeStamps = nullptr; TagNodeBase *hwTimeStamps = nullptr;
CommandStreamReceiver &computeCommandStreamReceiver = getGpgpuCommandStreamReceiver(); CommandStreamReceiver &computeCommandStreamReceiver = getGpgpuCommandStreamReceiver();
auto commandStreamReceiverOwnership = computeCommandStreamReceiver.obtainUniqueOwnership();
EventBuilder eventBuilder; EventBuilder eventBuilder;
setupEvent(eventBuilder, event, commandType); setupEvent(eventBuilder, event, commandType);
@@ -137,6 +136,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
std::unique_ptr<KernelOperation> blockedCommandsData; std::unique_ptr<KernelOperation> blockedCommandsData;
std::unique_ptr<PrintfHandler> printfHandler; std::unique_ptr<PrintfHandler> printfHandler;
TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this); TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this);
auto commandStreamReceiverOwnership = computeCommandStreamReceiver.obtainUniqueOwnership();
auto blockQueue = false; auto blockQueue = false;
auto taskLevel = 0u; auto taskLevel = 0u;
@@ -353,8 +353,8 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
timestampPacketDependencies.moveNodesToNewContainer(*deferredTimestampPackets); timestampPacketDependencies.moveNodesToNewContainer(*deferredTimestampPackets);
} }
queueOwnership.unlock();
commandStreamReceiverOwnership.unlock(); commandStreamReceiverOwnership.unlock();
queueOwnership.unlock();
if (blocking) { if (blocking) {
auto &builtinOpParams = multiDispatchInfo.peekBuiltinOpParams(); auto &builtinOpParams = multiDispatchInfo.peekBuiltinOpParams();
@@ -950,7 +950,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
CompletionStamp completionStamp = {this->taskCount, this->taskLevel, this->flushStamp->peekStamp()}; CompletionStamp completionStamp = {this->taskCount, this->taskLevel, this->flushStamp->peekStamp()};
bool flushGpgpuCsr = true; bool flushGpgpuCsr = true;
if ((enqueueProperties.operation == EnqueueProperties::Operation::Blit) && !isGpgpuSubmissionForBcsRequired(false, timestampPacketDependencies)) { if ((enqueueProperties.operation == EnqueueProperties::Operation::Blit) && commandStream == nullptr) {
flushGpgpuCsr = false; flushGpgpuCsr = false;
} else { } else {
csrDeps.makeResident(getGpgpuCommandStreamReceiver()); csrDeps.makeResident(getGpgpuCommandStreamReceiver());
@@ -1155,6 +1155,10 @@ void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispat
if (blockQueue) { if (blockQueue) {
enqueueBlocked(cmdType, nullptr, 0, multiDispatchInfo, timestampPacketDependencies, blockedCommandsData, enqueueProperties, eventsRequest, eventBuilder, nullptr, &bcsCsr); enqueueBlocked(cmdType, nullptr, 0, multiDispatchInfo, timestampPacketDependencies, blockedCommandsData, enqueueProperties, eventsRequest, eventBuilder, nullptr, &bcsCsr);
if (gpgpuSubmission) {
commandStreamReceiverOwnership.unlock();
}
} }
timestampPacketDependencies.moveNodesToNewContainer(*deferredTimestampPackets); timestampPacketDependencies.moveNodesToNewContainer(*deferredTimestampPackets);

View File

@@ -8,6 +8,8 @@
#include "opencl/test/unit_test/command_queue/enqueue_fixture.h" #include "opencl/test/unit_test/command_queue/enqueue_fixture.h"
#include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h"
#include <future>
using namespace NEO; using namespace NEO;
struct OOQFixtureFactory : public HelloWorldFixtureFactory { struct OOQFixtureFactory : public HelloWorldFixtureFactory {
@@ -89,4 +91,76 @@ TEST_F(OOQTaskTestsMt, GivenBlockedOnUserEventWhenEnqueingMarkerThenSuccessIsRet
retVal = clReleaseEvent(userEvent); retVal = clReleaseEvent(userEvent);
EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal);
}
TEST_F(OOQTaskTestsMt, givenBlitterWhenEnqueueCopyAndKernelUsingMultipleThreadsThenSuccessReturned) {
auto hwInfo = *defaultHwInfo;
hwInfo.capabilityTable.blitterOperationsSupported = true;
REQUIRE_FULL_BLITTER_OR_SKIP(&hwInfo);
DebugManagerStateRestore restorer;
DebugManager.flags.EnableBlitterForEnqueueOperations.set(1);
DebugManager.flags.DoCpuCopyOnReadBuffer.set(0);
DebugManager.flags.DoCpuCopyOnWriteBuffer.set(0);
constexpr uint32_t numThreads = 32;
std::atomic_uint32_t barrier = numThreads;
std::array<std::future<void>, numThreads> threads;
auto device = MockClDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo, rootDeviceIndex);
MockClDevice clDevice(device);
auto cmdQ = createCommandQueue(&clDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE);
EXPECT_EQ(cmdQ->taskCount, 0u);
EXPECT_EQ(cmdQ->getGpgpuCommandStreamReceiver().peekTaskCount(), 0u);
EXPECT_EQ(cmdQ->peekBcsTaskCount(aub_stream::EngineType::ENGINE_BCS), 0u);
auto buffer = std::unique_ptr<Buffer>(BufferHelper<>::create());
for (auto &thread : threads) {
thread = std::async(std::launch::async, [&]() {
auto alignedReadPtr = alignedMalloc(BufferDefaults::sizeInBytes, MemoryConstants::cacheLineSize);
barrier.fetch_sub(1u);
while (barrier.load() != 0u) {
std::this_thread::yield();
}
auto retVal = EnqueueWriteBufferHelper<>::enqueueWriteBuffer(cmdQ,
buffer.get(),
CL_TRUE,
0,
BufferDefaults::sizeInBytes,
alignedReadPtr,
nullptr,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
size_t workSize[] = {64};
retVal = EnqueueKernelHelper<>::enqueueKernel(cmdQ, KernelFixture::pKernel, 1, nullptr, workSize, workSize, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
retVal = EnqueueReadBufferHelper<>::enqueueReadBuffer(cmdQ,
buffer.get(),
CL_TRUE,
0,
BufferDefaults::sizeInBytes,
alignedReadPtr,
nullptr,
0,
nullptr,
nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
alignedFree(alignedReadPtr);
});
}
for (auto &thread : threads) {
thread.get();
}
EXPECT_NE(cmdQ->taskCount, 0u);
EXPECT_NE(cmdQ->getGpgpuCommandStreamReceiver().peekTaskCount(), 0u);
EXPECT_EQ(cmdQ->peekBcsTaskCount(aub_stream::EngineType::ENGINE_BCS), 2 * numThreads);
clReleaseCommandQueue(cmdQ);
} }