mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-10 12:53:42 +08:00
Track separate task count for each BCS in OpenCL CommandQueue
Related-To: NEO-6057 Signed-off-by: Maciej Dziuban <maciej.dziuban@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
852b2f6e6e
commit
19a6c9b1d3
@ -87,7 +87,6 @@ CommandQueue::CommandQueue(Context *context, ClDevice *device, const cl_queue_pr
|
||||
auto &selectorCopyEngine = neoDevice.getSelectorCopyEngine();
|
||||
auto bcsEngineType = EngineHelpers::getBcsEngineType(hwInfo, device->getDeviceBitfield(), selectorCopyEngine, internalUsage);
|
||||
bcsEngines[EngineHelpers::getBcsIndex(bcsEngineType)] = neoDevice.tryGetEngine(bcsEngineType, EngineUsage::Regular);
|
||||
bcsState.engineType = bcsEngineType;
|
||||
}
|
||||
}
|
||||
|
||||
@ -630,13 +629,15 @@ cl_uint CommandQueue::getQueueFamilyIndex() const {
|
||||
}
|
||||
|
||||
void CommandQueue::updateBcsTaskCount(aub_stream::EngineType bcsEngineType, uint32_t newBcsTaskCount) {
|
||||
UNRECOVERABLE_IF(getAnyBcs()->getOsContext().getEngineType() != bcsEngineType);
|
||||
this->bcsState.taskCount = newBcsTaskCount;
|
||||
CopyEngineState &state = bcsStates[EngineHelpers::getBcsIndex(bcsEngineType)];
|
||||
state.engineType = bcsEngineType;
|
||||
state.taskCount = newBcsTaskCount;
|
||||
}
|
||||
|
||||
uint32_t CommandQueue::peekBcsTaskCount(aub_stream::EngineType bcsEngineType) const {
|
||||
UNRECOVERABLE_IF(getAnyBcs()->getOsContext().getEngineType() != bcsEngineType);
|
||||
return this->bcsState.taskCount;
|
||||
const CopyEngineState &state = bcsStates[EngineHelpers::getBcsIndex(bcsEngineType)];
|
||||
DEBUG_BREAK_IF(!state.isValid());
|
||||
return state.taskCount;
|
||||
}
|
||||
|
||||
IndirectHeap &CommandQueue::getIndirectHeap(IndirectHeap::Type heapType, size_t minRequiredSize) {
|
||||
@ -887,7 +888,6 @@ void CommandQueue::overrideEngine(aub_stream::EngineType engineType, EngineUsage
|
||||
if (isEngineCopyOnly) {
|
||||
std::fill(bcsEngines.begin(), bcsEngines.end(), nullptr);
|
||||
bcsEngines[EngineHelpers::getBcsIndex(engineType)] = &device->getEngine(engineType, EngineUsage::Regular);
|
||||
bcsState.engineType = engineType;
|
||||
timestampPacketContainer = std::make_unique<TimestampPacketContainer>();
|
||||
deferredTimestampPackets = std::make_unique<TimestampPacketContainer>();
|
||||
isCopyOnly = true;
|
||||
@ -927,8 +927,13 @@ void CommandQueue::waitForAllEngines(bool blockedQueue, PrintfHandler *printfHan
|
||||
deferredTimestampPackets->swapNodes(nodesToRelease);
|
||||
}
|
||||
|
||||
Range<CopyEngineState> states{&bcsState, bcsState.isValid() ? 1u : 0u};
|
||||
waitUntilComplete(taskCount, states, flushStamp->peekStamp(), false);
|
||||
StackVec<CopyEngineState, bcsInfoMaskSize> activeBcsStates{};
|
||||
for (CopyEngineState &state : this->bcsStates) {
|
||||
if (state.isValid()) {
|
||||
activeBcsStates.push_back(state);
|
||||
}
|
||||
}
|
||||
waitUntilComplete(taskCount, activeBcsStates, flushStamp->peekStamp(), false);
|
||||
|
||||
if (printfHandler) {
|
||||
printfHandler->printEnqueueOutput();
|
||||
|
@ -380,7 +380,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
QueueThrottle throttle = QueueThrottle::MEDIUM;
|
||||
EnqueueProperties::Operation latestSentEnqueueType = EnqueueProperties::Operation::None;
|
||||
uint64_t sliceCount = QueueSliceCount::defaultSliceCount;
|
||||
CopyEngineState bcsState = {};
|
||||
std::array<CopyEngineState, bcsInfoMaskSize> bcsStates = {};
|
||||
|
||||
bool perfCountersEnabled = false;
|
||||
|
||||
|
@ -261,17 +261,17 @@ HWTEST_F(DispatchFlagsTests, whenEnqueueCommandWithoutKernelThenPassCorrectThrot
|
||||
EXPECT_EQ(mockCmdQ->throttle, mockCsr->passedDispatchFlags.throttle);
|
||||
}
|
||||
|
||||
HWTEST_F(DispatchFlagsTests, givenBlitEnqueueWhenDispatchingCommandsWithoutKernelThenDoImplicitFlush) {
|
||||
HWTEST_F(DispatchFlagsBlitTests, givenBlitEnqueueWhenDispatchingCommandsWithoutKernelThenDoImplicitFlush) {
|
||||
using CsrType = MockCsrHw2<FamilyType>;
|
||||
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(1);
|
||||
DebugManager.flags.EnableTimestampPacket.set(1);
|
||||
|
||||
SetUpImpl<CsrType>();
|
||||
REQUIRE_BLITTER_OR_SKIP(&device->getHardwareInfo());
|
||||
|
||||
auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), nullptr);
|
||||
auto mockCsr = static_cast<CsrType *>(&mockCmdQ->getGpgpuCommandStreamReceiver());
|
||||
mockCsr->skipBlitCalls = true;
|
||||
mockCmdQ->clearBcsEngines();
|
||||
mockCmdQ->bcsEngines[0] = mockCmdQ->gpgpuEngine;
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
auto buffer = std::unique_ptr<Buffer>(Buffer::create(context.get(), 0, 1, nullptr, retVal));
|
||||
auto &bcsCsr = *mockCmdQ->bcsEngines[0]->commandStreamReceiver;
|
||||
@ -306,18 +306,17 @@ HWTEST_F(DispatchFlagsTests, givenBlitEnqueueWhenDispatchingCommandsWithoutKerne
|
||||
EXPECT_EQ(GrfConfig::NotApplicable, mockCsr->passedDispatchFlags.numGrfRequired);
|
||||
}
|
||||
|
||||
HWTEST_F(DispatchFlagsTests, givenN1EnabledWhenDispatchingWithoutKernelThenAllowOutOfOrderExecution) {
|
||||
HWTEST_F(DispatchFlagsBlitTests, givenN1EnabledWhenDispatchingWithoutKernelThenAllowOutOfOrderExecution) {
|
||||
using CsrType = MockCsrHw2<FamilyType>;
|
||||
DebugManager.flags.EnableTimestampPacket.set(1);
|
||||
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(1);
|
||||
|
||||
SetUpImpl<CsrType>();
|
||||
REQUIRE_BLITTER_OR_SKIP(&device->getHardwareInfo());
|
||||
|
||||
auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), nullptr);
|
||||
auto mockCsr = static_cast<CsrType *>(&mockCmdQ->getGpgpuCommandStreamReceiver());
|
||||
mockCsr->skipBlitCalls = true;
|
||||
mockCmdQ->clearBcsEngines();
|
||||
mockCmdQ->bcsEngines[0] = mockCmdQ->gpgpuEngine;
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
auto buffer = std::unique_ptr<Buffer>(Buffer::create(context.get(), 0, 1, nullptr, retVal));
|
||||
auto &bcsCsr = *mockCmdQ->bcsEngines[0]->commandStreamReceiver;
|
||||
|
@ -932,13 +932,11 @@ HWTEST_F(EnqueueAuxKernelTests, givenDebugVariableDisablingBuiltinTranslationWhe
|
||||
pDevice->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
|
||||
|
||||
auto hwInfo = pDevice->getExecutionEnvironment()->rootDeviceEnvironments[rootDeviceIndex]->getMutableHardwareInfo();
|
||||
hwInfo->capabilityTable.blitterOperationsSupported = true;
|
||||
REQUIRE_BLITTER_OR_SKIP(hwInfo);
|
||||
|
||||
MockKernelWithInternals mockKernel(*pClDevice, context);
|
||||
MyCmdQ<FamilyType> cmdQ(context, pClDevice);
|
||||
cmdQ.clearBcsEngines();
|
||||
cmdQ.bcsEngines[0] = cmdQ.gpgpuEngine;
|
||||
|
||||
hwInfo->capabilityTable.blitterOperationsSupported = true;
|
||||
|
||||
size_t gws[3] = {1, 0, 0};
|
||||
MockBuffer buffer;
|
||||
@ -1045,4 +1043,4 @@ HWTEST_F(EnqueueKernelTest, givenTimestampWriteEnableWhenMarkerProfilingWithWait
|
||||
auto extendedCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, true, true);
|
||||
|
||||
EXPECT_EQ(baseCommandStreamSize + 4 * EncodeStoreMMIO<FamilyType>::size, extendedCommandStreamSize);
|
||||
}
|
||||
}
|
||||
|
@ -11,16 +11,23 @@
|
||||
#include "shared/test/common/mocks/mock_csr.h"
|
||||
#include "shared/test/common/mocks/mock_device.h"
|
||||
#include "shared/test/common/mocks/mock_execution_environment.h"
|
||||
#include "shared/test/common/test_macros/test_checks_shared.h"
|
||||
|
||||
#include "opencl/test/unit_test/mocks/mock_cl_device.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_context.h"
|
||||
|
||||
namespace NEO {
|
||||
struct DispatchFlagsTests : public ::testing::Test {
|
||||
template <bool setupBlitter>
|
||||
struct DispatchFlagsTestsBase : public ::testing::Test {
|
||||
template <typename CsrType>
|
||||
void SetUpImpl() {
|
||||
HardwareInfo hwInfo = *defaultHwInfo;
|
||||
if (setupBlitter) {
|
||||
hwInfo.capabilityTable.blitterOperationsSupported = true;
|
||||
}
|
||||
|
||||
environmentWrapper.setCsrType<CsrType>();
|
||||
device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
|
||||
device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
|
||||
context = std::make_unique<MockContext>(device.get());
|
||||
}
|
||||
|
||||
@ -29,4 +36,7 @@ struct DispatchFlagsTests : public ::testing::Test {
|
||||
std::unique_ptr<MockContext> context;
|
||||
DebugManagerStateRestore restore;
|
||||
};
|
||||
|
||||
using DispatchFlagsTests = DispatchFlagsTestsBase<false>;
|
||||
using DispatchFlagsBlitTests = DispatchFlagsTestsBase<true>;
|
||||
} // namespace NEO
|
||||
|
@ -176,7 +176,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIs
|
||||
|
||||
DebugManager.flags.EnableBlitterForEnqueueOperations.set(0);
|
||||
mockCmdQueue->clearBcsEngines();
|
||||
mockCmdQueue->bcsState.engineType = aub_stream::EngineType::NUM_ENGINES;
|
||||
mockCmdQueue->clearBcsStates();
|
||||
commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr);
|
||||
@ -192,7 +192,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIs
|
||||
|
||||
DebugManager.flags.EnableBlitterForEnqueueOperations.set(1);
|
||||
mockCmdQueue->clearBcsEngines();
|
||||
mockCmdQueue->bcsState.engineType = aub_stream::EngineType::NUM_ENGINES;
|
||||
mockCmdQueue->clearBcsStates();
|
||||
commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr);
|
||||
@ -208,7 +208,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIs
|
||||
|
||||
DebugManager.flags.EnableBlitterForEnqueueOperations.set(0);
|
||||
mockCmdQueue->bcsEngines[0] = bcsEngine;
|
||||
mockCmdQueue->bcsState.engineType = bcsEngine->getEngineType();
|
||||
mockCmdQueue->clearBcsStates();
|
||||
commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr);
|
||||
@ -225,7 +225,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIs
|
||||
|
||||
DebugManager.flags.EnableBlitterForEnqueueOperations.set(-1);
|
||||
mockCmdQueue->bcsEngines[0] = bcsEngine;
|
||||
mockCmdQueue->bcsState.engineType = bcsEngine->getEngineType();
|
||||
mockCmdQueue->clearBcsStates();
|
||||
commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr);
|
||||
@ -244,7 +244,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIs
|
||||
|
||||
DebugManager.flags.EnableBlitterForEnqueueOperations.set(1);
|
||||
mockCmdQueue->bcsEngines[0] = bcsEngine;
|
||||
mockCmdQueue->bcsState.engineType = bcsEngine->getEngineType();
|
||||
mockCmdQueue->clearBcsStates();
|
||||
commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(8u, bcsCsr->blitBufferCalled);
|
||||
commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
|
@ -215,7 +215,7 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
|
||||
|
||||
public:
|
||||
using BaseClass::bcsEngines;
|
||||
using BaseClass::bcsState;
|
||||
using BaseClass::bcsStates;
|
||||
using BaseClass::blitEnqueueAllowed;
|
||||
using BaseClass::commandQueueProperties;
|
||||
using BaseClass::commandStream;
|
||||
@ -229,6 +229,11 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
|
||||
using BaseClass::throttle;
|
||||
using BaseClass::timestampPacketContainer;
|
||||
|
||||
void clearBcsStates() {
|
||||
CopyEngineState unusedState{};
|
||||
std::fill(bcsStates.begin(), bcsStates.end(), unusedState);
|
||||
}
|
||||
|
||||
MockCommandQueueHw(Context *context,
|
||||
ClDevice *device,
|
||||
cl_queue_properties *properties) : BaseClass(context, device, properties, false) {
|
||||
|
Reference in New Issue
Block a user