Add logic for Events in multi-thread scenario

- inc refCount when enqueue is blocked and dec after flushing

Change-Id: I9e8f8d226897124a7e51f2473939d53868bef7a2
This commit is contained in:
Stefanowski, Adam
2018-12-04 14:18:17 +01:00
committed by sys_ocldev
parent 0c2dc1b438
commit 1001f76085
4 changed files with 38 additions and 3 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2018 Intel Corporation
* Copyright (C) 2017-2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -697,6 +697,10 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
(uint32_t)multiDispatchInfo.size());
if (timestampPacketContainer.get()) {
for (cl_uint i = 0; i < eventsRequest.numEventsInWaitList; i++) {
auto event = castToObjectOrAbort<Event>(eventsRequest.eventWaitList[i]);
event->incRefInternal();
}
cmd->setTimestampPacketNode(*timestampPacketContainer, *previousTimestampPacketNodes);
}
cmd->setEventsRequest(eventsRequest);

View File

@ -111,6 +111,14 @@ CommandComputeKernel::~CommandComputeKernel() {
kernelOperation->doNotFreeISH = true;
}
kernel->decRefInternal();
auto &commandStreamReceiver = commandQueue.getCommandStreamReceiver();
if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) {
for (cl_event eventFromWaitList : eventsWaitlist) {
auto event = castToObjectOrAbort<Event>(eventFromWaitList);
event->decRefInternal();
}
}
}
CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminated) {
@ -220,6 +228,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
taskLevel,
dispatchFlags,
commandQueue.getDevice());
commandQueue.waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp, false);
if (printfHandler) {
printfHandler.get()->printEnqueueOutput();

View File

@ -110,11 +110,16 @@ TEST(CommandTest, givenWaitlistRequestWhenCommandComputeKernelIsCreatedThenMakeL
std::vector<Surface *> surfaces;
auto kernelOperation = new KernelOperation(std::unique_ptr<LinearStream>(cmdStream), UniqueIH(ih1), UniqueIH(ih2), UniqueIH(ih3),
*device->getDefaultEngine().commandStreamReceiver->getInternalAllocationStorage());
MockCommandComputeKernel command(cmdQ, kernelOperation, surfaces, kernel);
UserEvent event1, event2, event3;
cl_event waitlist[] = {&event1, &event2};
EventsRequest eventsRequest(2, waitlist, nullptr);
MockCommandComputeKernel command(cmdQ, kernelOperation, surfaces, kernel);
event1.incRefInternal();
event2.incRefInternal();
command.setEventsRequest(eventsRequest);
waitlist[1] = &event3;

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018 Intel Corporation
* Copyright (C) 2018-2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -812,6 +812,23 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWhenEnqueueingBlockedThenMake
EXPECT_TRUE(csr.isMadeResident(timestampPacketNode->getGraphicsAllocation()));
}
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingBlockedThenVirtualEventIncrementsRefInternalAndDecrementsAfterCompleteEvent) {
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
csr.timestampPacketWriteEnabled = true;
MockKernelWithInternals mockKernelWithInternals(*device, context.get());
auto mockKernel = mockKernelWithInternals.mockKernel;
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), nullptr);
UserEvent userEvent;
cl_event waitlist = &userEvent;
auto internalCount = userEvent.getRefInternalCount();
cmdQ->enqueueKernel(mockKernel, 1, nullptr, gws, nullptr, 1, &waitlist, nullptr);
EXPECT_EQ(internalCount + 1, userEvent.getRefInternalCount());
userEvent.setStatus(CL_COMPLETE);
EXPECT_EQ(internalCount, mockKernel->getRefInternalCount());
}
TEST_F(TimestampPacketTests, givenDispatchSizeWhenAskingForNewTimestampsThenObtainEnoughTags) {
size_t dispatchSize = 3;