mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-22 10:17:01 +08:00
Add multithread enqueue blit OOQ test
Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
1a823356a3
commit
f91ae9d59c
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (C) 2018-2021 Intel Corporation
|
* Copyright (C) 2018-2022 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
@@ -52,8 +52,8 @@ void *CommandQueue::cpuDataTransferHandler(TransferProperties &transferPropertie
|
|||||||
*eventsRequest.outEvent = outEventObj;
|
*eventsRequest.outEvent = outEventObj;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto commandStreamReceieverOwnership = getGpgpuCommandStreamReceiver().obtainUniqueOwnership();
|
|
||||||
TakeOwnershipWrapper<CommandQueue> queueOwnership(*this);
|
TakeOwnershipWrapper<CommandQueue> queueOwnership(*this);
|
||||||
|
auto commandStreamReceieverOwnership = getGpgpuCommandStreamReceiver().obtainUniqueOwnership();
|
||||||
|
|
||||||
auto blockQueue = false;
|
auto blockQueue = false;
|
||||||
auto taskLevel = 0u;
|
auto taskLevel = 0u;
|
||||||
@@ -80,8 +80,8 @@ void *CommandQueue::cpuDataTransferHandler(TransferProperties &transferPropertie
|
|||||||
eventBuilder);
|
eventBuilder);
|
||||||
}
|
}
|
||||||
|
|
||||||
queueOwnership.unlock();
|
|
||||||
commandStreamReceieverOwnership.unlock();
|
commandStreamReceieverOwnership.unlock();
|
||||||
|
queueOwnership.unlock();
|
||||||
|
|
||||||
// read/write buffers are always blocking
|
// read/write buffers are always blocking
|
||||||
if (!blockQueue || transferProperties.blocking) {
|
if (!blockQueue || transferProperties.blocking) {
|
||||||
|
|||||||
@@ -127,7 +127,6 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
|||||||
|
|
||||||
TagNodeBase *hwTimeStamps = nullptr;
|
TagNodeBase *hwTimeStamps = nullptr;
|
||||||
CommandStreamReceiver &computeCommandStreamReceiver = getGpgpuCommandStreamReceiver();
|
CommandStreamReceiver &computeCommandStreamReceiver = getGpgpuCommandStreamReceiver();
|
||||||
auto commandStreamReceiverOwnership = computeCommandStreamReceiver.obtainUniqueOwnership();
|
|
||||||
|
|
||||||
EventBuilder eventBuilder;
|
EventBuilder eventBuilder;
|
||||||
setupEvent(eventBuilder, event, commandType);
|
setupEvent(eventBuilder, event, commandType);
|
||||||
@@ -137,6 +136,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
|||||||
std::unique_ptr<KernelOperation> blockedCommandsData;
|
std::unique_ptr<KernelOperation> blockedCommandsData;
|
||||||
std::unique_ptr<PrintfHandler> printfHandler;
|
std::unique_ptr<PrintfHandler> printfHandler;
|
||||||
TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this);
|
TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this);
|
||||||
|
auto commandStreamReceiverOwnership = computeCommandStreamReceiver.obtainUniqueOwnership();
|
||||||
|
|
||||||
auto blockQueue = false;
|
auto blockQueue = false;
|
||||||
auto taskLevel = 0u;
|
auto taskLevel = 0u;
|
||||||
@@ -353,8 +353,8 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
|||||||
timestampPacketDependencies.moveNodesToNewContainer(*deferredTimestampPackets);
|
timestampPacketDependencies.moveNodesToNewContainer(*deferredTimestampPackets);
|
||||||
}
|
}
|
||||||
|
|
||||||
queueOwnership.unlock();
|
|
||||||
commandStreamReceiverOwnership.unlock();
|
commandStreamReceiverOwnership.unlock();
|
||||||
|
queueOwnership.unlock();
|
||||||
|
|
||||||
if (blocking) {
|
if (blocking) {
|
||||||
auto &builtinOpParams = multiDispatchInfo.peekBuiltinOpParams();
|
auto &builtinOpParams = multiDispatchInfo.peekBuiltinOpParams();
|
||||||
@@ -950,7 +950,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
|
|||||||
CompletionStamp completionStamp = {this->taskCount, this->taskLevel, this->flushStamp->peekStamp()};
|
CompletionStamp completionStamp = {this->taskCount, this->taskLevel, this->flushStamp->peekStamp()};
|
||||||
bool flushGpgpuCsr = true;
|
bool flushGpgpuCsr = true;
|
||||||
|
|
||||||
if ((enqueueProperties.operation == EnqueueProperties::Operation::Blit) && !isGpgpuSubmissionForBcsRequired(false, timestampPacketDependencies)) {
|
if ((enqueueProperties.operation == EnqueueProperties::Operation::Blit) && commandStream == nullptr) {
|
||||||
flushGpgpuCsr = false;
|
flushGpgpuCsr = false;
|
||||||
} else {
|
} else {
|
||||||
csrDeps.makeResident(getGpgpuCommandStreamReceiver());
|
csrDeps.makeResident(getGpgpuCommandStreamReceiver());
|
||||||
@@ -1155,6 +1155,10 @@ void CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispat
|
|||||||
|
|
||||||
if (blockQueue) {
|
if (blockQueue) {
|
||||||
enqueueBlocked(cmdType, nullptr, 0, multiDispatchInfo, timestampPacketDependencies, blockedCommandsData, enqueueProperties, eventsRequest, eventBuilder, nullptr, &bcsCsr);
|
enqueueBlocked(cmdType, nullptr, 0, multiDispatchInfo, timestampPacketDependencies, blockedCommandsData, enqueueProperties, eventsRequest, eventBuilder, nullptr, &bcsCsr);
|
||||||
|
|
||||||
|
if (gpgpuSubmission) {
|
||||||
|
commandStreamReceiverOwnership.unlock();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
timestampPacketDependencies.moveNodesToNewContainer(*deferredTimestampPackets);
|
timestampPacketDependencies.moveNodesToNewContainer(*deferredTimestampPackets);
|
||||||
|
|||||||
@@ -8,6 +8,8 @@
|
|||||||
#include "opencl/test/unit_test/command_queue/enqueue_fixture.h"
|
#include "opencl/test/unit_test/command_queue/enqueue_fixture.h"
|
||||||
#include "opencl/test/unit_test/fixtures/hello_world_fixture.h"
|
#include "opencl/test/unit_test/fixtures/hello_world_fixture.h"
|
||||||
|
|
||||||
|
#include <future>
|
||||||
|
|
||||||
using namespace NEO;
|
using namespace NEO;
|
||||||
|
|
||||||
struct OOQFixtureFactory : public HelloWorldFixtureFactory {
|
struct OOQFixtureFactory : public HelloWorldFixtureFactory {
|
||||||
@@ -89,4 +91,76 @@ TEST_F(OOQTaskTestsMt, GivenBlockedOnUserEventWhenEnqueingMarkerThenSuccessIsRet
|
|||||||
|
|
||||||
retVal = clReleaseEvent(userEvent);
|
retVal = clReleaseEvent(userEvent);
|
||||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(OOQTaskTestsMt, givenBlitterWhenEnqueueCopyAndKernelUsingMultipleThreadsThenSuccessReturned) {
|
||||||
|
auto hwInfo = *defaultHwInfo;
|
||||||
|
hwInfo.capabilityTable.blitterOperationsSupported = true;
|
||||||
|
REQUIRE_FULL_BLITTER_OR_SKIP(&hwInfo);
|
||||||
|
|
||||||
|
DebugManagerStateRestore restorer;
|
||||||
|
DebugManager.flags.EnableBlitterForEnqueueOperations.set(1);
|
||||||
|
DebugManager.flags.DoCpuCopyOnReadBuffer.set(0);
|
||||||
|
DebugManager.flags.DoCpuCopyOnWriteBuffer.set(0);
|
||||||
|
|
||||||
|
constexpr uint32_t numThreads = 32;
|
||||||
|
std::atomic_uint32_t barrier = numThreads;
|
||||||
|
std::array<std::future<void>, numThreads> threads;
|
||||||
|
|
||||||
|
auto device = MockClDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo, rootDeviceIndex);
|
||||||
|
MockClDevice clDevice(device);
|
||||||
|
auto cmdQ = createCommandQueue(&clDevice, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE);
|
||||||
|
EXPECT_EQ(cmdQ->taskCount, 0u);
|
||||||
|
EXPECT_EQ(cmdQ->getGpgpuCommandStreamReceiver().peekTaskCount(), 0u);
|
||||||
|
EXPECT_EQ(cmdQ->peekBcsTaskCount(aub_stream::EngineType::ENGINE_BCS), 0u);
|
||||||
|
auto buffer = std::unique_ptr<Buffer>(BufferHelper<>::create());
|
||||||
|
|
||||||
|
for (auto &thread : threads) {
|
||||||
|
thread = std::async(std::launch::async, [&]() {
|
||||||
|
auto alignedReadPtr = alignedMalloc(BufferDefaults::sizeInBytes, MemoryConstants::cacheLineSize);
|
||||||
|
barrier.fetch_sub(1u);
|
||||||
|
while (barrier.load() != 0u) {
|
||||||
|
std::this_thread::yield();
|
||||||
|
}
|
||||||
|
|
||||||
|
auto retVal = EnqueueWriteBufferHelper<>::enqueueWriteBuffer(cmdQ,
|
||||||
|
buffer.get(),
|
||||||
|
CL_TRUE,
|
||||||
|
0,
|
||||||
|
BufferDefaults::sizeInBytes,
|
||||||
|
alignedReadPtr,
|
||||||
|
nullptr,
|
||||||
|
0,
|
||||||
|
nullptr,
|
||||||
|
nullptr);
|
||||||
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
|
|
||||||
|
size_t workSize[] = {64};
|
||||||
|
retVal = EnqueueKernelHelper<>::enqueueKernel(cmdQ, KernelFixture::pKernel, 1, nullptr, workSize, workSize, 0, nullptr, nullptr);
|
||||||
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
|
|
||||||
|
retVal = EnqueueReadBufferHelper<>::enqueueReadBuffer(cmdQ,
|
||||||
|
buffer.get(),
|
||||||
|
CL_TRUE,
|
||||||
|
0,
|
||||||
|
BufferDefaults::sizeInBytes,
|
||||||
|
alignedReadPtr,
|
||||||
|
nullptr,
|
||||||
|
0,
|
||||||
|
nullptr,
|
||||||
|
nullptr);
|
||||||
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
|
|
||||||
|
alignedFree(alignedReadPtr);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
for (auto &thread : threads) {
|
||||||
|
thread.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPECT_NE(cmdQ->taskCount, 0u);
|
||||||
|
EXPECT_NE(cmdQ->getGpgpuCommandStreamReceiver().peekTaskCount(), 0u);
|
||||||
|
EXPECT_EQ(cmdQ->peekBcsTaskCount(aub_stream::EngineType::ENGINE_BCS), 2 * numThreads);
|
||||||
|
|
||||||
|
clReleaseCommandQueue(cmdQ);
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user