Add capability to csr to allow N:1 aggregation when ooq is created.

- This allows applications to force the N:1 aggregation by creating out
of order queue.
- That switches csr to N:1 submission model where commands from multiple
command streams may be aggregated.
- That forces scenarios returning an event to be aggregated as well.

Change-Id: I8fd8d7f88bb2665234ee90870133120b206710a8
This commit is contained in:
Mrozek, Michal 2018-04-26 11:43:47 +02:00 committed by sys_ocldev
parent 82c9acddde
commit 34ff5852eb
5 changed files with 46 additions and 3 deletions

View File

@ -66,6 +66,7 @@ class CommandQueueHw : public CommandQueue {
if (getCmdQueueProperties<cl_queue_properties>(properties, CL_QUEUE_PROPERTIES) & static_cast<cl_queue_properties>(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)) {
device->getCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::BatchedDispatch);
device->getCommandStreamReceiver().enableNTo1SubmissionModel();
}
}

View File

@ -556,7 +556,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
dispatchFlags.implicitFlush = implicitFlush;
dispatchFlags.flushStampReference = this->flushStamp->getStampReference();
dispatchFlags.preemptionMode = PreemptionHelper::taskPreemptionMode(*device, multiDispatchInfo);
dispatchFlags.outOfOrderExecutionAllowed = !eventBuilder.getEvent() || this->isOOQEnabled();
dispatchFlags.outOfOrderExecutionAllowed = !eventBuilder.getEvent() || commandStreamReceiver.isNTo1SubmissionModelEnabled();
DEBUG_BREAK_IF(taskLevel >= Event::eventNotReady);

View File

@ -104,6 +104,8 @@ class CommandStreamReceiver {
uint32_t peekLatestFlushedTaskCount() const { return latestFlushedTaskCount; }
void enableNTo1SubmissionModel() { this->nTo1SubmissionModelEnabled = true; }
bool isNTo1SubmissionModelEnabled() const { return this->nTo1SubmissionModelEnabled; }
void overrideDispatchPolicy(DispatchMode overrideValue) { this->dispatchMode = overrideValue; }
virtual void overrideMediaVFEStateDirty(bool dirty) { mediaVfeStateDirty = dirty; }
@ -173,6 +175,7 @@ class CommandStreamReceiver {
std::unique_ptr<OSInterface> osInterface;
std::unique_ptr<SubmissionAggregator> submissionAggregator;
bool nTo1SubmissionModelEnabled = false;
DispatchMode dispatchMode = DispatchMode::ImmediateDispatch;
bool disableL3Cache = false;
uint32_t requiredScratchSize = 0;

View File

@ -84,4 +84,16 @@ HWTEST_F(clCreateCommandQueueTest, givenOoqParametersWhenQueueIsCreatedThenComma
EXPECT_EQ(DispatchMode::BatchedDispatch, csr.dispatchMode);
retVal = clReleaseCommandQueue(cmdq);
}
HWTEST_F(clCreateCommandQueueTest, givenOoqParametersWhenQueueIsCreatedThenCommandStreamReceiverSwitchesToNTo1SubmissionModel) {
cl_int retVal = CL_SUCCESS;
cl_queue_properties ooq = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
auto &csr = reinterpret_cast<UltCommandStreamReceiver<FamilyType> &>(pContext->getDevice(0)->getCommandStreamReceiver());
EXPECT_FALSE(csr.isNTo1SubmissionModelEnabled());
auto cmdq = clCreateCommandQueue(pContext, devices[0], ooq, &retVal);
EXPECT_TRUE(csr.isNTo1SubmissionModelEnabled());
retVal = clReleaseCommandQueue(cmdq);
}
} // namespace ULT

View File

@ -1469,10 +1469,34 @@ HWTEST_F(EnqueueKernelTest, givenInOrderCommandQueueWhenEnqueueKernelReturningEv
clReleaseEvent(event);
}
HWTEST_F(EnqueueKernelTest, givenOutOfOrderCommandQueueWhenEnqueueKernelReturningEventIsMadeThenPipeControlPositionIsRecorded) {
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0};
HWTEST_F(EnqueueKernelTest, givenInOrderCommandQueueWhenEnqueueKernelReturningEventIsMadeAndCommandStreamReceiverIsInNTo1ModeThenPipeControlPositionIsRecorded) {
const cl_queue_properties props[] = {0};
auto inOrderQueue = clCreateCommandQueueWithProperties(context, pDevice, props, nullptr);
auto mockCsr = new MockCsrHw2<FamilyType>(pDevice->getHardwareInfo());
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
pDevice->resetCommandStreamReceiver(mockCsr);
mockCsr->enableNTo1SubmissionModel();
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator);
MockKernelWithInternals mockKernel(*pDevice);
size_t gws[3] = {1, 0, 0};
cl_event event;
clEnqueueNDRangeKernel(inOrderQueue, mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event);
EXPECT_FALSE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty());
auto cmdBuffer = mockedSubmissionsAggregator->peekCmdBufferList().peekHead();
EXPECT_NE(nullptr, cmdBuffer->pipeControlThatMayBeErasedLocation);
EXPECT_NE(nullptr, cmdBuffer->epiloguePipeControlLocation);
clReleaseCommandQueue(inOrderQueue);
clReleaseEvent(event);
}
HWTEST_F(EnqueueKernelTest, givenOutOfOrderCommandQueueWhenEnqueueKernelReturningEventIsMadeThenPipeControlPositionIsRecorded) {
auto mockCsr = new MockCsrHw2<FamilyType>(pDevice->getHardwareInfo());
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
pDevice->resetCommandStreamReceiver(mockCsr);
@ -1480,6 +1504,9 @@ HWTEST_F(EnqueueKernelTest, givenOutOfOrderCommandQueueWhenEnqueueKernelReturnin
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();
mockCsr->overrideSubmissionAggregator(mockedSubmissionsAggregator);
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0};
auto inOrderQueue = clCreateCommandQueueWithProperties(context, pDevice, props, nullptr);
MockKernelWithInternals mockKernel(*pDevice);
size_t gws[3] = {1, 0, 0};
cl_event event;