Track separate task count for each BCS in OpenCL CommandQueue

Related-To: NEO-6057
Signed-off-by: Maciej Dziuban <maciej.dziuban@intel.com>
This commit is contained in:
Maciej Dziuban
2021-10-06 15:19:50 +00:00
committed by Compute-Runtime-Automation
parent 852b2f6e6e
commit 19a6c9b1d3
7 changed files with 45 additions and 28 deletions

View File

@ -87,7 +87,6 @@ CommandQueue::CommandQueue(Context *context, ClDevice *device, const cl_queue_pr
auto &selectorCopyEngine = neoDevice.getSelectorCopyEngine();
auto bcsEngineType = EngineHelpers::getBcsEngineType(hwInfo, device->getDeviceBitfield(), selectorCopyEngine, internalUsage);
bcsEngines[EngineHelpers::getBcsIndex(bcsEngineType)] = neoDevice.tryGetEngine(bcsEngineType, EngineUsage::Regular);
bcsState.engineType = bcsEngineType;
}
}
@ -630,13 +629,15 @@ cl_uint CommandQueue::getQueueFamilyIndex() const {
}
void CommandQueue::updateBcsTaskCount(aub_stream::EngineType bcsEngineType, uint32_t newBcsTaskCount) {
UNRECOVERABLE_IF(getAnyBcs()->getOsContext().getEngineType() != bcsEngineType);
this->bcsState.taskCount = newBcsTaskCount;
CopyEngineState &state = bcsStates[EngineHelpers::getBcsIndex(bcsEngineType)];
state.engineType = bcsEngineType;
state.taskCount = newBcsTaskCount;
}
uint32_t CommandQueue::peekBcsTaskCount(aub_stream::EngineType bcsEngineType) const {
UNRECOVERABLE_IF(getAnyBcs()->getOsContext().getEngineType() != bcsEngineType);
return this->bcsState.taskCount;
const CopyEngineState &state = bcsStates[EngineHelpers::getBcsIndex(bcsEngineType)];
DEBUG_BREAK_IF(!state.isValid());
return state.taskCount;
}
IndirectHeap &CommandQueue::getIndirectHeap(IndirectHeap::Type heapType, size_t minRequiredSize) {
@ -887,7 +888,6 @@ void CommandQueue::overrideEngine(aub_stream::EngineType engineType, EngineUsage
if (isEngineCopyOnly) {
std::fill(bcsEngines.begin(), bcsEngines.end(), nullptr);
bcsEngines[EngineHelpers::getBcsIndex(engineType)] = &device->getEngine(engineType, EngineUsage::Regular);
bcsState.engineType = engineType;
timestampPacketContainer = std::make_unique<TimestampPacketContainer>();
deferredTimestampPackets = std::make_unique<TimestampPacketContainer>();
isCopyOnly = true;
@ -927,8 +927,13 @@ void CommandQueue::waitForAllEngines(bool blockedQueue, PrintfHandler *printfHan
deferredTimestampPackets->swapNodes(nodesToRelease);
}
Range<CopyEngineState> states{&bcsState, bcsState.isValid() ? 1u : 0u};
waitUntilComplete(taskCount, states, flushStamp->peekStamp(), false);
StackVec<CopyEngineState, bcsInfoMaskSize> activeBcsStates{};
for (CopyEngineState &state : this->bcsStates) {
if (state.isValid()) {
activeBcsStates.push_back(state);
}
}
waitUntilComplete(taskCount, activeBcsStates, flushStamp->peekStamp(), false);
if (printfHandler) {
printfHandler->printEnqueueOutput();

View File

@ -380,7 +380,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
QueueThrottle throttle = QueueThrottle::MEDIUM;
EnqueueProperties::Operation latestSentEnqueueType = EnqueueProperties::Operation::None;
uint64_t sliceCount = QueueSliceCount::defaultSliceCount;
CopyEngineState bcsState = {};
std::array<CopyEngineState, bcsInfoMaskSize> bcsStates = {};
bool perfCountersEnabled = false;

View File

@ -261,17 +261,17 @@ HWTEST_F(DispatchFlagsTests, whenEnqueueCommandWithoutKernelThenPassCorrectThrot
EXPECT_EQ(mockCmdQ->throttle, mockCsr->passedDispatchFlags.throttle);
}
HWTEST_F(DispatchFlagsTests, givenBlitEnqueueWhenDispatchingCommandsWithoutKernelThenDoImplicitFlush) {
HWTEST_F(DispatchFlagsBlitTests, givenBlitEnqueueWhenDispatchingCommandsWithoutKernelThenDoImplicitFlush) {
using CsrType = MockCsrHw2<FamilyType>;
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(1);
DebugManager.flags.EnableTimestampPacket.set(1);
SetUpImpl<CsrType>();
REQUIRE_BLITTER_OR_SKIP(&device->getHardwareInfo());
auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), nullptr);
auto mockCsr = static_cast<CsrType *>(&mockCmdQ->getGpgpuCommandStreamReceiver());
mockCsr->skipBlitCalls = true;
mockCmdQ->clearBcsEngines();
mockCmdQ->bcsEngines[0] = mockCmdQ->gpgpuEngine;
cl_int retVal = CL_SUCCESS;
auto buffer = std::unique_ptr<Buffer>(Buffer::create(context.get(), 0, 1, nullptr, retVal));
auto &bcsCsr = *mockCmdQ->bcsEngines[0]->commandStreamReceiver;
@ -306,18 +306,17 @@ HWTEST_F(DispatchFlagsTests, givenBlitEnqueueWhenDispatchingCommandsWithoutKerne
EXPECT_EQ(GrfConfig::NotApplicable, mockCsr->passedDispatchFlags.numGrfRequired);
}
HWTEST_F(DispatchFlagsTests, givenN1EnabledWhenDispatchingWithoutKernelThenAllowOutOfOrderExecution) {
HWTEST_F(DispatchFlagsBlitTests, givenN1EnabledWhenDispatchingWithoutKernelThenAllowOutOfOrderExecution) {
using CsrType = MockCsrHw2<FamilyType>;
DebugManager.flags.EnableTimestampPacket.set(1);
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(1);
SetUpImpl<CsrType>();
REQUIRE_BLITTER_OR_SKIP(&device->getHardwareInfo());
auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), nullptr);
auto mockCsr = static_cast<CsrType *>(&mockCmdQ->getGpgpuCommandStreamReceiver());
mockCsr->skipBlitCalls = true;
mockCmdQ->clearBcsEngines();
mockCmdQ->bcsEngines[0] = mockCmdQ->gpgpuEngine;
cl_int retVal = CL_SUCCESS;
auto buffer = std::unique_ptr<Buffer>(Buffer::create(context.get(), 0, 1, nullptr, retVal));
auto &bcsCsr = *mockCmdQ->bcsEngines[0]->commandStreamReceiver;

View File

@ -932,13 +932,11 @@ HWTEST_F(EnqueueAuxKernelTests, givenDebugVariableDisablingBuiltinTranslationWhe
pDevice->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
auto hwInfo = pDevice->getExecutionEnvironment()->rootDeviceEnvironments[rootDeviceIndex]->getMutableHardwareInfo();
hwInfo->capabilityTable.blitterOperationsSupported = true;
REQUIRE_BLITTER_OR_SKIP(hwInfo);
MockKernelWithInternals mockKernel(*pClDevice, context);
MyCmdQ<FamilyType> cmdQ(context, pClDevice);
cmdQ.clearBcsEngines();
cmdQ.bcsEngines[0] = cmdQ.gpgpuEngine;
hwInfo->capabilityTable.blitterOperationsSupported = true;
size_t gws[3] = {1, 0, 0};
MockBuffer buffer;

View File

@ -11,16 +11,23 @@
#include "shared/test/common/mocks/mock_csr.h"
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/mocks/mock_execution_environment.h"
#include "shared/test/common/test_macros/test_checks_shared.h"
#include "opencl/test/unit_test/mocks/mock_cl_device.h"
#include "opencl/test/unit_test/mocks/mock_context.h"
namespace NEO {
struct DispatchFlagsTests : public ::testing::Test {
template <bool setupBlitter>
struct DispatchFlagsTestsBase : public ::testing::Test {
template <typename CsrType>
void SetUpImpl() {
HardwareInfo hwInfo = *defaultHwInfo;
if (setupBlitter) {
hwInfo.capabilityTable.blitterOperationsSupported = true;
}
environmentWrapper.setCsrType<CsrType>();
device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
context = std::make_unique<MockContext>(device.get());
}
@ -29,4 +36,7 @@ struct DispatchFlagsTests : public ::testing::Test {
std::unique_ptr<MockContext> context;
DebugManagerStateRestore restore;
};
using DispatchFlagsTests = DispatchFlagsTestsBase<false>;
using DispatchFlagsBlitTests = DispatchFlagsTestsBase<true>;
} // namespace NEO

View File

@ -176,7 +176,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIs
DebugManager.flags.EnableBlitterForEnqueueOperations.set(0);
mockCmdQueue->clearBcsEngines();
mockCmdQueue->bcsState.engineType = aub_stream::EngineType::NUM_ENGINES;
mockCmdQueue->clearBcsStates();
commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr);
@ -192,7 +192,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIs
DebugManager.flags.EnableBlitterForEnqueueOperations.set(1);
mockCmdQueue->clearBcsEngines();
mockCmdQueue->bcsState.engineType = aub_stream::EngineType::NUM_ENGINES;
mockCmdQueue->clearBcsStates();
commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr);
@ -208,7 +208,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIs
DebugManager.flags.EnableBlitterForEnqueueOperations.set(0);
mockCmdQueue->bcsEngines[0] = bcsEngine;
mockCmdQueue->bcsState.engineType = bcsEngine->getEngineType();
mockCmdQueue->clearBcsStates();
commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr);
@ -225,7 +225,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIs
DebugManager.flags.EnableBlitterForEnqueueOperations.set(-1);
mockCmdQueue->bcsEngines[0] = bcsEngine;
mockCmdQueue->bcsState.engineType = bcsEngine->getEngineType();
mockCmdQueue->clearBcsStates();
commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr);
@ -244,7 +244,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIs
DebugManager.flags.EnableBlitterForEnqueueOperations.set(1);
mockCmdQueue->bcsEngines[0] = bcsEngine;
mockCmdQueue->bcsState.engineType = bcsEngine->getEngineType();
mockCmdQueue->clearBcsStates();
commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(8u, bcsCsr->blitBufferCalled);
commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);

View File

@ -215,7 +215,7 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
public:
using BaseClass::bcsEngines;
using BaseClass::bcsState;
using BaseClass::bcsStates;
using BaseClass::blitEnqueueAllowed;
using BaseClass::commandQueueProperties;
using BaseClass::commandStream;
@ -229,6 +229,11 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
using BaseClass::throttle;
using BaseClass::timestampPacketContainer;
void clearBcsStates() {
CopyEngineState unusedState{};
std::fill(bcsStates.begin(), bcsStates.end(), unusedState);
}
MockCommandQueueHw(Context *context,
ClDevice *device,
cl_queue_properties *properties) : BaseClass(context, device, properties, false) {