Optimize BCS flushing scheme [2/n]

Change-Id: I6f1e0115b9c45f89afb86f8fd2304604243541df
Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2020-06-25 11:35:29 +02:00
committed by sys_ocldev
parent d4b12c97d4
commit 86dc5bacc7
19 changed files with 291 additions and 134 deletions

View File

@ -147,28 +147,28 @@ bool CommandQueue::isCompleted(uint32_t taskCount) const {
return tag >= taskCount;
}
void CommandQueue::waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) {
void CommandQueue::waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcsTaskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) {
WAIT_ENTER()
DBG_LOG(LogTaskCounts, __FUNCTION__, "Waiting for taskCount:", taskCountToWait);
DBG_LOG(LogTaskCounts, __FUNCTION__, "Waiting for taskCount:", gpgpuTaskCountToWait);
DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "Current taskCount:", getHwTag());
bool forcePowerSavingMode = this->throttle == QueueThrottle::LOW;
getGpgpuCommandStreamReceiver().waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait,
getGpgpuCommandStreamReceiver().waitForTaskCountWithKmdNotifyFallback(gpgpuTaskCountToWait, flushStampToWait,
useQuickKmdSleep, forcePowerSavingMode);
DEBUG_BREAK_IF(getHwTag() < taskCountToWait);
DEBUG_BREAK_IF(getHwTag() < gpgpuTaskCountToWait);
if (gtpinIsGTPinInitialized()) {
gtpinNotifyTaskCompletion(taskCountToWait);
gtpinNotifyTaskCompletion(gpgpuTaskCountToWait);
}
if (auto bcsCsr = getBcsCommandStreamReceiver()) {
bcsCsr->waitForTaskCountWithKmdNotifyFallback(bcsTaskCount, 0, false, false);
bcsCsr->waitForTaskCountAndCleanTemporaryAllocationList(bcsTaskCount);
bcsCsr->waitForTaskCountWithKmdNotifyFallback(bcsTaskCountToWait, 0, false, false);
bcsCsr->waitForTaskCountAndCleanTemporaryAllocationList(bcsTaskCountToWait);
}
getGpgpuCommandStreamReceiver().waitForTaskCountAndCleanTemporaryAllocationList(taskCountToWait);
getGpgpuCommandStreamReceiver().waitForTaskCountAndCleanTemporaryAllocationList(gpgpuTaskCountToWait);
WAIT_LEAVE()
}

View File

@ -215,7 +215,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
MOCKABLE_VIRTUAL bool isQueueBlocked();
MOCKABLE_VIRTUAL void waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep);
MOCKABLE_VIRTUAL void waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcsTaskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep);
static uint32_t getTaskLevelFromWaitList(uint32_t taskLevel,
cl_uint numEventsInWaitList,
@ -299,6 +299,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
}
void updateBcsTaskCount(uint32_t newBcsTaskCount) { this->bcsTaskCount = newBcsTaskCount; }
uint32_t peekBcsTaskCount() const { return bcsTaskCount; }
// taskCount of last task
uint32_t taskCount = 0;

View File

@ -145,7 +145,7 @@ void *CommandQueue::cpuDataTransferHandler(TransferProperties &transferPropertie
if (outEventObj) {
outEventObj->setEndTimeStamp();
outEventObj->updateTaskCount(this->taskCount);
outEventObj->updateTaskCount(this->taskCount, this->bcsTaskCount);
outEventObj->flushStamp->replaceStampObject(this->flushStamp->getStampReference());
if (eventCompleted) {
outEventObj->setStatus(CL_COMPLETE);

View File

@ -305,7 +305,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
getGpgpuCommandStreamReceiver().setMediaVFEStateDirty(true);
if (devQueueHw->getSchedulerReturnInstance() > 0) {
waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp, false);
waitUntilComplete(completionStamp.taskCount, bcsTaskCount, completionStamp.flushStamp, false);
this->runSchedulerSimulation(*devQueueHw, *parentKernel);
}
}
@ -353,7 +353,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
updateFromCompletionStamp(completionStamp);
if (eventBuilder.getEvent()) {
eventBuilder.getEvent()->updateCompletionStamp(completionStamp.taskCount, completionStamp.taskLevel, completionStamp.flushStamp);
eventBuilder.getEvent()->updateCompletionStamp(completionStamp.taskCount, bcsTaskCount, completionStamp.taskLevel, completionStamp.flushStamp);
FileLoggerInstance().log(DebugManager.flags.EventsDebugEnable.get(), "updateCompletionStamp Event", eventBuilder.getEvent(), "taskLevel", eventBuilder.getEvent()->taskLevel.load());
}
@ -382,9 +382,9 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
if (blockQueue) {
while (isQueueBlocked()) {
}
waitUntilComplete(taskCount, flushStamp->peekStamp(), false);
waitUntilComplete(taskCount, bcsTaskCount, flushStamp->peekStamp(), false);
} else {
waitUntilComplete(taskCount, flushStamp->peekStamp(), false);
waitUntilComplete(taskCount, bcsTaskCount, flushStamp->peekStamp(), false);
if (printfHandler) {
printfHandler->printEnqueueOutput();
}

View File

@ -27,7 +27,7 @@ cl_int CommandQueueHw<GfxFamily>::finish() {
auto flushStampToWaitFor = this->flushStamp->peekStamp();
// Stall until HW reaches CQ taskCount
waitUntilComplete(taskCountToWaitFor, flushStampToWaitFor, false);
waitUntilComplete(taskCountToWaitFor, this->bcsTaskCount, flushStampToWaitFor, false);
return CL_SUCCESS;
}

View File

@ -224,8 +224,9 @@ uint32_t Event::getCompletionStamp() const {
return this->taskCount;
}
void Event::updateCompletionStamp(uint32_t taskCount, uint32_t tasklevel, FlushStamp flushStamp) {
this->taskCount = taskCount;
void Event::updateCompletionStamp(uint32_t gpgpuTaskCount, uint32_t bcsTaskCount, uint32_t tasklevel, FlushStamp flushStamp) {
this->taskCount = gpgpuTaskCount;
this->bcsTaskCount = bcsTaskCount;
this->taskLevel = tasklevel;
this->flushStamp->setStamp(flushStamp);
}
@ -370,7 +371,7 @@ inline bool Event::wait(bool blocking, bool useQuickKmdSleep) {
}
}
cmdQueue->waitUntilComplete(taskCount.load(), flushStamp->peekStamp(), useQuickKmdSleep);
cmdQueue->waitUntilComplete(taskCount.load(), this->bcsTaskCount, flushStamp->peekStamp(), useQuickKmdSleep);
updateExecutionStatus();
DEBUG_BREAK_IF(this->taskLevel == CompletionStamp::notReady && this->executionStatus >= 0);
@ -510,11 +511,9 @@ void Event::transitionExecutionStatus(int32_t newExecutionStatus) const {
void Event::submitCommand(bool abortTasks) {
std::unique_ptr<Command> cmdToProcess(cmdToSubmit.exchange(nullptr));
if (cmdToProcess.get() != nullptr) {
std::unique_lock<CommandStreamReceiver::MutexType> lockCSR;
if (this->cmdQueue) {
lockCSR = this->getCommandQueue()->getGpgpuCommandStreamReceiver().obtainUniqueOwnership();
}
if ((this->isProfilingEnabled()) && (this->cmdQueue != nullptr)) {
auto lockCSR = getCommandQueue()->getGpgpuCommandStreamReceiver().obtainUniqueOwnership();
if (this->isProfilingEnabled()) {
if (timeStampNode) {
this->cmdQueue->getGpgpuCommandStreamReceiver().makeResident(*timeStampNode->getBaseGraphicsAllocation());
cmdToProcess->timestamp = timeStampNode;
@ -530,10 +529,10 @@ void Event::submitCommand(bool abortTasks) {
}
}
auto &complStamp = cmdToProcess->submit(taskLevel, abortTasks);
if (profilingCpuPath && this->isProfilingEnabled() && (this->cmdQueue != nullptr)) {
if (profilingCpuPath && this->isProfilingEnabled()) {
setEndTimeStamp();
}
updateTaskCount(complStamp.taskCount);
updateTaskCount(complStamp.taskCount, cmdQueue->peekBcsTaskCount());
flushStamp->setStamp(complStamp.flushStamp);
submittedCmd.exchange(cmdToProcess.release());
} else if (profilingCpuPath && endTimeStamp == 0) {
@ -543,7 +542,7 @@ void Event::submitCommand(bool abortTasks) {
if (!this->isUserEvent() && this->eventWithoutCommand) {
if (this->cmdQueue) {
auto lockCSR = this->getCommandQueue()->getGpgpuCommandStreamReceiver().obtainUniqueOwnership();
updateTaskCount(this->cmdQueue->getGpgpuCommandStreamReceiver().peekTaskCount());
updateTaskCount(this->cmdQueue->getGpgpuCommandStreamReceiver().peekTaskCount(), cmdQueue->peekBcsTaskCount());
}
}
//make sure that task count is synchronized for events with kernels

View File

@ -89,7 +89,7 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
~Event() override;
uint32_t getCompletionStamp(void) const;
void updateCompletionStamp(uint32_t taskCount, uint32_t tasklevel, FlushStamp flushStamp);
void updateCompletionStamp(uint32_t taskCount, uint32_t bcsTaskCount, uint32_t tasklevel, FlushStamp flushStamp);
cl_ulong getDelta(cl_ulong startTime,
cl_ulong endTime);
void setCPUProfilingPath(bool isCPUPath) { this->profilingCpuPath = isCPUPath; }
@ -243,14 +243,15 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
virtual void unblockEventBy(Event &event, uint32_t taskLevel, int32_t transitionStatus);
void updateTaskCount(uint32_t taskCount) {
if (taskCount == CompletionStamp::notReady) {
void updateTaskCount(uint32_t gpgpuTaskCount, uint32_t bcsTaskCount) {
if (gpgpuTaskCount == CompletionStamp::notReady) {
DEBUG_BREAK_IF(true);
return;
}
uint32_t prevTaskCount = this->taskCount.exchange(taskCount);
if ((prevTaskCount != CompletionStamp::notReady) && (prevTaskCount > taskCount)) {
this->bcsTaskCount = bcsTaskCount;
uint32_t prevTaskCount = this->taskCount.exchange(gpgpuTaskCount);
if ((prevTaskCount != CompletionStamp::notReady) && (prevTaskCount > gpgpuTaskCount)) {
this->taskCount = prevTaskCount;
DEBUG_BREAK_IF(true);
}
@ -363,6 +364,7 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
uint64_t startTimeStamp;
uint64_t endTimeStamp;
uint64_t completeTimeStamp;
uint32_t bcsTaskCount = 0;
bool perfCountersEnabled;
TagNode<HwTimeStamps> *timeStampNode = nullptr;
TagNode<HwPerfCounter> *perfCounterNode = nullptr;

View File

@ -88,7 +88,7 @@ CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) {
commandQueue.getDevice());
if (!memObj.isMemObjZeroCopy()) {
commandQueue.waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp, false);
commandQueue.waitUntilComplete(completionStamp.taskCount, commandQueue.peekBcsTaskCount(), completionStamp.flushStamp, false);
if (operationType == MAP) {
memObj.transferDataToHostPtr(copySize, copyOffset);
} else if (!readOnly) {
@ -268,7 +268,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
}
if (printfHandler) {
commandQueue.waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp, false);
commandQueue.waitUntilComplete(completionStamp.taskCount, commandQueue.peekBcsTaskCount(), completionStamp.flushStamp, false);
printfHandler.get()->printEnqueueOutput();
}

View File

@ -1025,6 +1025,132 @@ HWTEST_TEMPLATED_F(BlitEnqueueFlushTests, givenDebugFlagSetWhenCheckingBcsCacheF
EXPECT_TRUE(mockCommandQueue->isCacheFlushForBcsRequired());
}
using BlitEnqueueTaskCountTests = BlitEnqueueTests<1>;
HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, whenWaitUntilCompletionCalledThenWaitForSpecificBcsTaskCount) {
uint32_t gpgpuTaskCount = 123;
uint32_t bcsTaskCount = 123;
commandQueue->waitUntilComplete(gpgpuTaskCount, bcsTaskCount, 0, false);
EXPECT_EQ(gpgpuTaskCount, static_cast<UltCommandStreamReceiver<FamilyType> *>(gpgpuCsr)->latestWaitForCompletionWithTimeoutTaskCount.load());
EXPECT_EQ(bcsTaskCount, static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsCsr)->latestWaitForCompletionWithTimeoutTaskCount.load());
}
HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenEventWhenWaitingForCompletionThenWaitForCurrentBcsTaskCount) {
auto buffer = createBuffer(1, false);
buffer->forceDisallowCPUCopy = true;
int hostPtr = 0;
auto ultGpgpuCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(gpgpuCsr);
auto ultBcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsCsr);
cl_event outEvent1, outEvent2;
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, &outEvent1);
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, &outEvent2);
clWaitForEvents(1, &outEvent2);
EXPECT_EQ(2u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
EXPECT_EQ(2u, ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
clWaitForEvents(1, &outEvent1);
EXPECT_EQ(1u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
EXPECT_EQ(1u, ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
clReleaseEvent(outEvent1);
clReleaseEvent(outEvent2);
}
HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenBlockedEventWhenWaitingForCompletionThenWaitForCurrentBcsTaskCount) {
auto buffer = createBuffer(1, false);
buffer->forceDisallowCPUCopy = true;
int hostPtr = 0;
auto ultGpgpuCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(gpgpuCsr);
auto ultBcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsCsr);
cl_event outEvent1, outEvent2;
UserEvent userEvent;
cl_event waitlist1 = &userEvent;
cl_event *waitlist2 = &outEvent1;
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 1, &waitlist1, &outEvent1);
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 1, waitlist2, &outEvent2);
userEvent.setStatus(CL_COMPLETE);
clWaitForEvents(1, &outEvent2);
EXPECT_EQ(2u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
EXPECT_EQ(2u, ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
clWaitForEvents(1, &outEvent1);
EXPECT_EQ(1u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
EXPECT_EQ(1u, ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
clReleaseEvent(outEvent1);
clReleaseEvent(outEvent2);
EXPECT_FALSE(commandQueue->isQueueBlocked());
}
HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenBlockedEnqueueWithoutKernelWhenWaitingForCompletionThenWaitForCurrentBcsTaskCount) {
auto ultGpgpuCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(gpgpuCsr);
auto ultBcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsCsr);
cl_event outEvent1, outEvent2;
UserEvent userEvent;
cl_event waitlist1 = &userEvent;
cl_event *waitlist2 = &outEvent1;
commandQueue->enqueueMarkerWithWaitList(1, &waitlist1, &outEvent1);
commandQueue->enqueueMarkerWithWaitList(1, waitlist2, &outEvent2);
userEvent.setStatus(CL_COMPLETE);
clWaitForEvents(1, &outEvent2);
EXPECT_EQ(1u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
EXPECT_EQ(0u, ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
clWaitForEvents(1, &outEvent1);
EXPECT_EQ(0u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
EXPECT_EQ(0u, ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
clReleaseEvent(outEvent1);
clReleaseEvent(outEvent2);
EXPECT_FALSE(commandQueue->isQueueBlocked());
}
HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenEventFromCpuCopyWhenWaitingForCompletionThenWaitForCurrentBcsTaskCount) {
DebugManager.flags.DoCpuCopyOnWriteBuffer.set(1);
auto buffer = createBuffer(1, false);
int hostPtr = 0;
auto ultGpgpuCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(gpgpuCsr);
auto ultBcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsCsr);
ultGpgpuCsr->taskCount = 1;
commandQueue->taskCount = 1;
ultBcsCsr->taskCount = 2;
commandQueue->updateBcsTaskCount(2);
cl_event outEvent1, outEvent2;
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, &outEvent1);
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, &outEvent2);
clWaitForEvents(1, &outEvent2);
EXPECT_EQ(1u, static_cast<UltCommandStreamReceiver<FamilyType> *>(gpgpuCsr)->latestWaitForCompletionWithTimeoutTaskCount.load());
EXPECT_EQ(2u, static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsCsr)->latestWaitForCompletionWithTimeoutTaskCount.load());
clWaitForEvents(1, &outEvent1);
EXPECT_EQ(1u, static_cast<UltCommandStreamReceiver<FamilyType> *>(gpgpuCsr)->latestWaitForCompletionWithTimeoutTaskCount.load());
EXPECT_EQ(2u, static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsCsr)->latestWaitForCompletionWithTimeoutTaskCount.load());
clReleaseEvent(outEvent1);
clReleaseEvent(outEvent2);
}
using BlitEnqueueWithDisabledGpgpuSubmissionTests = BlitEnqueueTests<1>;
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenDebugFlagSetWhenDoingBcsCopyThenSubmitToGpgpuOnlyIfPreviousEnqueueWasGpgpu) {

View File

@ -329,7 +329,7 @@ HWTEST_F(CommandQueueHwTest, GivenEventsWaitlistOnBlockingMapBufferWillWaitForEv
MockEvent(Context *ctx, uint32_t updateCountBeforeCompleted)
: UserEvent(ctx),
updateCount(0), updateCountBeforeCompleted(updateCountBeforeCompleted) {
this->updateTaskCount(0);
this->updateTaskCount(0, 0);
this->taskLevel = 0;
}
@ -959,7 +959,7 @@ HWTEST_F(CommandQueueHwTest, givenEventWithRecordedCommandWhenSubmitCommandIsCal
std::thread t([&]() {
while (!go)
;
neoEvent.updateTaskCount(77u);
neoEvent.updateTaskCount(77u, 0);
});
neoEvent.submitCommand(false);

View File

@ -799,7 +799,7 @@ struct WaitForQueueCompletionTests : public ::testing::Test {
template <typename Family>
struct MyCmdQueue : public CommandQueueHw<Family> {
MyCmdQueue(Context *context, ClDevice *device) : CommandQueueHw<Family>(context, device, nullptr, false){};
void waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override {
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcsTaskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override {
requestedUseQuickKmdSleep = useQuickKmdSleep;
waitUntilCompleteCounter++;
}

View File

@ -105,9 +105,9 @@ struct EnqueueHandlerWithAubSubCaptureTests : public EnqueueHandlerTest {
public:
MockCmdQWithAubSubCapture(Context *context, ClDevice *device) : CommandQueueHw<FamilyType>(context, device, nullptr, false) {}
void waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override {
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcsTaskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override {
waitUntilCompleteCalled = true;
CommandQueueHw<FamilyType>::waitUntilComplete(taskCountToWait, flushStampToWait, useQuickKmdSleep);
CommandQueueHw<FamilyType>::waitUntilComplete(gpgpuTaskCountToWait, bcsTaskCountToWait, flushStampToWait, useQuickKmdSleep);
}
void obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes, bool clearAllDependencies) override {
@ -420,10 +420,10 @@ HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWhenAddPatchInfoCommentsForAUBDu
}
HWTEST_F(EnqueueHandlerTest, givenExternallySynchronizedParentEventWhenRequestingEnqueueWithoutGpuSubmissionThenTaskCountIsNotInherited) {
struct ExternallySynchEvent : VirtualEvent {
ExternallySynchEvent(CommandQueue *cmdQueue) {
struct ExternallySynchEvent : UserEvent {
ExternallySynchEvent() : UserEvent() {
setStatus(CL_COMPLETE);
this->updateTaskCount(7);
this->updateTaskCount(7, 0);
}
bool isExternallySynchronized() const override {
return true;
@ -432,7 +432,7 @@ HWTEST_F(EnqueueHandlerTest, givenExternallySynchronizedParentEventWhenRequestin
auto mockCmdQ = new MockCommandQueueHw<FamilyType>(context, pClDevice, 0);
ExternallySynchEvent synchEvent(mockCmdQ);
ExternallySynchEvent synchEvent;
cl_event inEv = &synchEvent;
cl_event outEv = nullptr;

View File

@ -718,9 +718,9 @@ struct EnqueueAuxKernelTests : public EnqueueKernelTest {
auxTranslationDirection);
}
void waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override {
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcsTaskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override {
waitCalled++;
CommandQueueHw<FamilyType>::waitUntilComplete(taskCountToWait, flushStampToWait, useQuickKmdSleep);
CommandQueueHw<FamilyType>::waitUntilComplete(gpgpuTaskCountToWait, bcsTaskCountToWait, flushStampToWait, useQuickKmdSleep);
}
std::vector<AuxTranslationDirection> auxTranslationDirections;

View File

@ -7,11 +7,13 @@
#include "shared/source/helpers/timestamp_packet.h"
#include "shared/test/unit_test/helpers/debug_manager_state_restore.h"
#include "shared/test/unit_test/utilities/base_object_utils.h"
#include "opencl/source/event/async_events_handler.h"
#include "opencl/source/event/event.h"
#include "opencl/source/event/user_event.h"
#include "opencl/test/unit_test/mocks/mock_async_event_handler.h"
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
#include "opencl/test/unit_test/mocks/mock_context.h"
#include "test.h"
@ -32,7 +34,7 @@ class AsyncEventsHandlerTests : public ::testing::Test {
}
void setTaskStamp(uint32_t taskLevel, uint32_t taskCount) {
this->taskLevel.store(taskLevel);
this->updateTaskCount(taskCount);
this->updateTaskCount(taskCount, 0);
}
MOCK_METHOD2(wait, bool(bool blocking, bool quickKmdSleep));
@ -46,36 +48,34 @@ class AsyncEventsHandlerTests : public ::testing::Test {
dbgRestore.reset(new DebugManagerStateRestore());
DebugManager.flags.EnableAsyncEventsHandler.set(false);
handler.reset(new MockHandler());
context = new NiceMock<MockContext>();
context = make_releaseable<NiceMock<MockContext>>();
event1 = new NiceMock<MyEvent>(context, nullptr, CL_COMMAND_BARRIER, CompletionStamp::notReady, CompletionStamp::notReady);
event2 = new NiceMock<MyEvent>(context, nullptr, CL_COMMAND_BARRIER, CompletionStamp::notReady, CompletionStamp::notReady);
event3 = new NiceMock<MyEvent>(context, nullptr, CL_COMMAND_BARRIER, CompletionStamp::notReady, CompletionStamp::notReady);
}
commandQueue = make_releaseable<MockCommandQueue>(context.get(), context->getDevice(0), nullptr);
void TearDown() override {
context->release();
event1->release();
event2->release();
event3->release();
*(commandQueue->getGpgpuCommandStreamReceiver().getTagAddress()) = 0;
event1 = make_releaseable<NiceMock<MyEvent>>(context.get(), commandQueue.get(), CL_COMMAND_BARRIER, CompletionStamp::notReady, CompletionStamp::notReady);
event2 = make_releaseable<NiceMock<MyEvent>>(context.get(), commandQueue.get(), CL_COMMAND_BARRIER, CompletionStamp::notReady, CompletionStamp::notReady);
event3 = make_releaseable<NiceMock<MyEvent>>(context.get(), commandQueue.get(), CL_COMMAND_BARRIER, CompletionStamp::notReady, CompletionStamp::notReady);
}
std::unique_ptr<DebugManagerStateRestore> dbgRestore;
std::unique_ptr<MockHandler> handler;
int counter = 0;
NiceMock<MyEvent> *event1 = nullptr;
NiceMock<MyEvent> *event2 = nullptr;
NiceMock<MyEvent> *event3 = nullptr;
NiceMock<MockContext> *context = nullptr;
ReleaseableObjectPtr<NiceMock<MockContext>> context;
ReleaseableObjectPtr<MockCommandQueue> commandQueue;
ReleaseableObjectPtr<NiceMock<MyEvent>> event1;
ReleaseableObjectPtr<NiceMock<MyEvent>> event2;
ReleaseableObjectPtr<NiceMock<MyEvent>> event3;
};
TEST_F(AsyncEventsHandlerTests, givenEventsWhenListIsProcessedThenUpdateExecutionStatus) {
event1->setTaskStamp(0, 0);
event2->setTaskStamp(0, 0);
handler->registerEvent(event1);
handler->registerEvent(event2);
handler->registerEvent(event1.get());
handler->registerEvent(event2.get());
EXPECT_EQ(CL_QUEUED, event1->getExecutionStatus());
EXPECT_EQ(CL_QUEUED, event2->getExecutionStatus());
@ -91,7 +91,7 @@ TEST_F(AsyncEventsHandlerTests, givenEventsWhenListIsProcessedThenUpdateExecutio
TEST_F(AsyncEventsHandlerTests, WhenProcessIsCompletedThenRefInternalCountIsDecremented) {
event1->setTaskStamp(CompletionStamp::notReady, 0);
handler->registerEvent(event1);
handler->registerEvent(event1.get());
EXPECT_EQ(2, event1->getRefInternalCount());
handler->process();
EXPECT_TRUE(handler->peekIsListEmpty());
@ -103,7 +103,7 @@ TEST_F(AsyncEventsHandlerTests, givenNotCalledCallbacksWhenListIsProcessedThenDo
event1->setTaskStamp(CompletionStamp::notReady, 0);
event1->addCallback(&this->callbackFcn, CL_SUBMITTED, &submittedCounter);
event1->addCallback(&this->callbackFcn, CL_COMPLETE, &completeCounter);
handler->registerEvent(event1);
handler->registerEvent(event1.get());
auto expect = [&](int status, int sCounter, int cCounter, bool empty) {
EXPECT_EQ(status, event1->getExecutionStatus());
@ -160,10 +160,10 @@ TEST_F(AsyncEventsHandlerTests, givenExternallSynchronizedEventWhenListIsProcess
}
TEST_F(AsyncEventsHandlerTests, givenDoubleRegisteredEventWhenListIsProcessedAndNoCallbacksToProcessThenUnregister) {
event1->setTaskStamp(CompletionStamp::notReady - 1, 0);
event1->setTaskStamp(CompletionStamp::notReady - 1, CompletionStamp::notReady + 1);
event1->addCallback(&this->callbackFcn, CL_SUBMITTED, &counter);
handler->registerEvent(event1);
handler->registerEvent(event1);
handler->registerEvent(event1.get());
handler->registerEvent(event1.get());
handler->process();
EXPECT_EQ(CL_SUBMITTED, event1->getExecutionStatus());
@ -178,9 +178,9 @@ TEST_F(AsyncEventsHandlerTests, givenEventsNotHandledByHandlderWhenDestructingTh
event1->addCallback(&this->callbackFcn, CL_SUBMITTED, &counter);
event2->addCallback(&this->callbackFcn, CL_SUBMITTED, &counter);
myHandler->registerEvent(event1);
myHandler->registerEvent(event1.get());
myHandler->process();
myHandler->registerEvent(event2);
myHandler->registerEvent(event2.get());
EXPECT_FALSE(myHandler->peekIsListEmpty());
EXPECT_FALSE(myHandler->peekIsRegisterListEmpty());
@ -202,9 +202,9 @@ TEST_F(AsyncEventsHandlerTests, givenEventsNotHandledByHandlderWhenAsyncExecutio
event1->addCallback(&this->callbackFcn, CL_SUBMITTED, &counter);
event2->addCallback(&this->callbackFcn, CL_SUBMITTED, &counter);
handler->registerEvent(event1);
handler->registerEvent(event1.get());
handler->process();
handler->registerEvent(event2);
handler->registerEvent(event2.get());
EXPECT_FALSE(handler->peekIsListEmpty());
EXPECT_FALSE(handler->peekIsRegisterListEmpty());
@ -231,15 +231,15 @@ TEST_F(AsyncEventsHandlerTests, WhenHandlerIsRegisteredThenThreadIsCreated) {
event1->setTaskStamp(CompletionStamp::notReady, 0);
EXPECT_FALSE(handler->openThreadCalled);
handler->registerEvent(event1);
handler->registerEvent(event1.get());
EXPECT_TRUE(handler->openThreadCalled);
}
TEST_F(AsyncEventsHandlerTests, WhenProcessingAsynchronouslyThenBothThreadsCompelete) {
DebugManager.flags.EnableAsyncEventsHandler.set(true);
event1->setTaskStamp(CompletionStamp::notReady, 0);
event2->setTaskStamp(CompletionStamp::notReady, 0);
event1->setTaskStamp(CompletionStamp::notReady, CompletionStamp::notReady + 1);
event2->setTaskStamp(CompletionStamp::notReady, CompletionStamp::notReady + 1);
event1->addCallback(&this->callbackFcn, CL_SUBMITTED, &counter);
event2->addCallback(&this->callbackFcn, CL_SUBMITTED, &counter);
@ -309,14 +309,14 @@ TEST_F(AsyncEventsHandlerTests, givenRegistredEventsWhenProcessIsCalledThenRetur
event3->setTaskStamp(0, 3);
event2->addCallback(&this->callbackFcn, CL_COMPLETE, &event2Counter);
handler->registerEvent(event2);
handler->registerEvent(event2.get());
event1->addCallback(&this->callbackFcn, CL_COMPLETE, &event1Counter);
handler->registerEvent(event1);
handler->registerEvent(event1.get());
event3->addCallback(&this->callbackFcn, CL_COMPLETE, &event3Counter);
handler->registerEvent(event3);
handler->registerEvent(event3.get());
auto sleepCandidate = handler->process();
EXPECT_EQ(event1, sleepCandidate);
EXPECT_EQ(event1.get(), sleepCandidate);
event1->setStatus(CL_COMPLETE);
event2->setStatus(CL_COMPLETE);
@ -327,12 +327,12 @@ TEST_F(AsyncEventsHandlerTests, givenEventWithoutCallbacksWhenProcessedThenDontR
event1->setTaskStamp(0, 1);
event2->setTaskStamp(0, 2);
handler->registerEvent(event1);
handler->registerEvent(event1.get());
event2->addCallback(&this->callbackFcn, CL_COMPLETE, &counter);
handler->registerEvent(event2);
handler->registerEvent(event2.get());
auto sleepCandidate = handler->process();
EXPECT_EQ(event2, sleepCandidate);
EXPECT_EQ(event2.get(), sleepCandidate);
event2->setStatus(CL_COMPLETE);
}
@ -340,7 +340,7 @@ TEST_F(AsyncEventsHandlerTests, givenEventWithoutCallbacksWhenProcessedThenDontR
TEST_F(AsyncEventsHandlerTests, givenSleepCandidateWhenProcessedThenCallWaitWithQuickKmdSleepRequest) {
event1->setTaskStamp(0, 1);
event1->addCallback(&this->callbackFcn, CL_COMPLETE, &counter);
handler->registerEvent(event1);
handler->registerEvent(event1.get());
handler->allowAsyncProcess.store(true);
// break infinite loop after first iteartion

View File

@ -24,12 +24,12 @@
namespace NEO {
struct SmallEventBuilderEventMock : MockEvent<Event> {
SmallEventBuilderEventMock(int param1, float param2)
: MockEvent<Event>(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0), constructionParam1(param1), constructionParam2(param2) {
SmallEventBuilderEventMock(CommandQueue *commandQueue, int param1, float param2)
: MockEvent<Event>(commandQueue, CL_COMMAND_NDRANGE_KERNEL, 0, 0), constructionParam1(param1), constructionParam2(param2) {
}
SmallEventBuilderEventMock()
: SmallEventBuilderEventMock(1, 2.0f) {
SmallEventBuilderEventMock(CommandQueue *commandQueue)
: SmallEventBuilderEventMock(commandQueue, 1, 2.0f) {
}
void overrideMagic(cl_long newMagic) {
@ -54,12 +54,16 @@ struct SmallEventBuilderMock : EventBuilder {
};
TEST(EventBuilder, whenCreatingNewEventForwardsArgumentsToEventConstructor) {
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
MockContext context(device.get());
MockCommandQueue cmdQ(&context, device.get(), nullptr);
EventBuilder eventBuilder;
EXPECT_EQ(nullptr, eventBuilder.getEvent());
constexpr int constrParam1 = 7;
constexpr float constrParam2 = 13.0f;
eventBuilder.create<SmallEventBuilderEventMock>(constrParam1, constrParam2);
eventBuilder.create<SmallEventBuilderEventMock>(&cmdQ, constrParam1, constrParam2);
Event *peekedEvent = eventBuilder.getEvent();
ASSERT_NE(nullptr, peekedEvent);
auto finalizedEvent = static_cast<SmallEventBuilderEventMock *>(eventBuilder.finalizeAndRelease());
@ -79,7 +83,8 @@ TEST(EventBuilder, givenVirtualEventWithCommandThenFinalizeAddChild) {
};
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
MockCommandQueue cmdQ(nullptr, device.get(), nullptr);
MockContext context(device.get());
MockCommandQueue cmdQ(&context, device.get(), nullptr);
MockKernelWithInternals kernel(*device);
IndirectHeap *ih1 = nullptr, *ih2 = nullptr, *ih3 = nullptr;
@ -94,7 +99,7 @@ TEST(EventBuilder, givenVirtualEventWithCommandThenFinalizeAddChild) {
std::unique_ptr<MockCommandComputeKernel> command = std::make_unique<MockCommandComputeKernel>(cmdQ, kernelOperation, surfaces, kernel);
VirtualEvent virtualEvent;
VirtualEvent virtualEvent(&cmdQ);
virtualEvent.setCommand(std::move(command));
EventBuilder eventBuilder;
@ -102,7 +107,7 @@ TEST(EventBuilder, givenVirtualEventWithCommandThenFinalizeAddChild) {
constexpr int constrParam1 = 7;
constexpr float constrParam2 = 13.0f;
eventBuilder.create<SmallEventBuilderEventMock>(constrParam1, constrParam2);
eventBuilder.create<SmallEventBuilderEventMock>(&cmdQ, constrParam1, constrParam2);
Event *peekedEvent = eventBuilder.getEvent();
ASSERT_NE(nullptr, peekedEvent);
virtualEvent.taskLevel = CL_SUBMITTED;
@ -128,7 +133,8 @@ TEST(EventBuilder, givenVirtualEventWithSubmittedCommandAsParentThenFinalizeNotA
};
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
MockCommandQueue cmdQ(nullptr, device.get(), nullptr);
MockContext context(device.get());
MockCommandQueue cmdQ(&context, device.get(), nullptr);
MockKernelWithInternals kernel(*device);
IndirectHeap *ih1 = nullptr, *ih2 = nullptr, *ih3 = nullptr;
@ -152,7 +158,7 @@ TEST(EventBuilder, givenVirtualEventWithSubmittedCommandAsParentThenFinalizeNotA
constexpr int constrParam1 = 7;
constexpr float constrParam2 = 13.0f;
eventBuilder.create<SmallEventBuilderEventMock>(constrParam1, constrParam2);
eventBuilder.create<SmallEventBuilderEventMock>(&cmdQ, constrParam1, constrParam2);
Event *peekedEvent = eventBuilder.getEvent();
ASSERT_NE(nullptr, peekedEvent);
virtualEvent.taskLevel = CL_SUBMITTED;
@ -164,11 +170,15 @@ TEST(EventBuilder, givenVirtualEventWithSubmittedCommandAsParentThenFinalizeNotA
}
TEST(EventBuilder, whenDestroyingEventBuilderImplicitFinalizeIscalled) {
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
MockContext context(device.get());
MockCommandQueue cmdQ(&context, device.get(), nullptr);
SmallEventBuilderEventMock *ev = nullptr;
auto parentEvent = new UserEvent;
{
EventBuilder eventBuilder{};
eventBuilder.create<SmallEventBuilderEventMock>();
eventBuilder.create<SmallEventBuilderEventMock>(&cmdQ);
eventBuilder.addParentEvent(*parentEvent);
ev = static_cast<SmallEventBuilderEventMock *>(eventBuilder.getEvent());
ASSERT_NE(nullptr, ev);
@ -181,14 +191,18 @@ TEST(EventBuilder, whenDestroyingEventBuilderImplicitFinalizeIscalled) {
}
TEST(EventBuilder, whenFinalizeIsCalledTwiceOnEventBuilderThenSecondRequestIsDropped) {
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
MockContext context(device.get());
MockCommandQueue cmdQ(&context, device.get(), nullptr);
SmallEventBuilderEventMock *ev = nullptr;
EventBuilder eventBuilder{};
eventBuilder.create<SmallEventBuilderEventMock>();
eventBuilder.create<SmallEventBuilderEventMock>(&cmdQ);
ev = static_cast<SmallEventBuilderEventMock *>(eventBuilder.getEvent());
ASSERT_NE(nullptr, ev);
eventBuilder.finalize();
auto *falseParentEvent = new UserEvent();
auto *falseChildEvent = new SmallEventBuilderEventMock;
auto *falseChildEvent = new SmallEventBuilderEventMock(&cmdQ);
auto numParents = ev->peekNumEventsBlockingThis();
auto numChildren = (ev->peekChildEvents() != nullptr) ? 1U + ev->peekChildEvents()->countSuccessors() : 0;
eventBuilder.addParentEvent(*falseParentEvent);
@ -202,8 +216,12 @@ TEST(EventBuilder, whenFinalizeIsCalledTwiceOnEventBuilderThenSecondRequestIsDro
}
TEST(EventBuilder, whenFinalizeAndReleaseIsCalledThenEventBuilderReleasesReferenceToEvent) {
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
MockContext context(device.get());
MockCommandQueue cmdQ(&context, device.get(), nullptr);
EventBuilder eventBuilder;
eventBuilder.create<SmallEventBuilderEventMock>();
eventBuilder.create<SmallEventBuilderEventMock>(&cmdQ);
auto ev = static_cast<SmallEventBuilderEventMock *>(eventBuilder.finalizeAndRelease());
ASSERT_NE(nullptr, ev);
ASSERT_EQ(nullptr, eventBuilder.getEvent());
@ -224,6 +242,10 @@ TEST(EventBuilder, whenClearIsCalledThenAllEventsAndReferencesAreDropped) {
}
TEST(EventBuilder, whenCParentEventsGetAddedThenTheirReferenceCountGetsIncreasedUntilFinalizeIsCalled) {
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
MockContext context(device.get());
MockCommandQueue cmdQ(&context, device.get(), nullptr);
UserEvent evParent1;
UserEvent evParent2;
@ -231,7 +253,7 @@ TEST(EventBuilder, whenCParentEventsGetAddedThenTheirReferenceCountGetsIncreased
EXPECT_EQ(1, evParent2.getRefInternalCount());
EventBuilder eventBuilder;
eventBuilder.create<SmallEventBuilderEventMock>();
eventBuilder.create<SmallEventBuilderEventMock>(&cmdQ);
eventBuilder.addParentEvent(evParent1);
EXPECT_EQ(2, evParent1.getRefInternalCount());
eventBuilder.addParentEvent(evParent2);
@ -305,7 +327,11 @@ TEST(EventBuilder, whenAddingNullptrAsNewParentEventThenItIsIgnored) {
}
TEST(EventBuilder, whenAddingValidEventAsNewParentEventThenItIsProperlyAddedToParentsList) {
auto event = new SmallEventBuilderEventMock;
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
MockContext context(device.get());
MockCommandQueue cmdQ(&context, device.get(), nullptr);
auto event = new SmallEventBuilderEventMock(&cmdQ);
SmallEventBuilderMock eventBuilder;
eventBuilder.create<MockEvent<Event>>(nullptr, CL_COMMAND_MARKER, 0, 0);
EXPECT_EQ(0U, eventBuilder.getParentEvents().size());
@ -317,8 +343,12 @@ TEST(EventBuilder, whenAddingValidEventAsNewParentEventThenItIsProperlyAddedToPa
}
TEST(EventBuilder, whenAddingMultipleEventsAsNewParentsThenOnlyValidOnesAreInsertedIntoParentsList) {
auto event = new SmallEventBuilderEventMock;
auto invalidEvent = new SmallEventBuilderEventMock;
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
MockContext context(device.get());
MockCommandQueue cmdQ(&context, device.get(), nullptr);
auto event = new SmallEventBuilderEventMock(&cmdQ);
auto invalidEvent = new SmallEventBuilderEventMock(&cmdQ);
invalidEvent->overrideMagic(0);
cl_event eventsList[] = {nullptr, event, invalidEvent};
SmallEventBuilderMock eventBuilder;

View File

@ -87,7 +87,7 @@ TEST(Event, givenEventWithHigherTaskCountWhenLowerTaskCountIsBeingSetThenTaskCou
Event *event = new Event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 4, 10);
EXPECT_EQ(10u, event->peekTaskCount());
event->updateTaskCount(8);
event->updateTaskCount(8, 0);
EXPECT_EQ(10u, event->peekTaskCount());
delete event;
}
@ -601,7 +601,7 @@ TEST_F(InternalsEventTest, givenBlockedKernelWithPrintfWhenSubmittedThenPrintOut
TEST_F(InternalsEventTest, GivenMapOperationWhenSubmittingCommandsThenTaskLevelIsIncremented) {
auto pCmdQ = make_releaseable<MockCommandQueue>(mockContext, pClDevice, nullptr);
MockEvent<Event> event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0);
MockEvent<Event> event(pCmdQ.get(), CL_COMMAND_NDRANGE_KERNEL, 0, 0);
auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
auto buffer = new MockBuffer;
@ -622,7 +622,7 @@ TEST_F(InternalsEventTest, GivenMapOperationWhenSubmittingCommandsThenTaskLevelI
TEST_F(InternalsEventTest, GivenMapOperationNonZeroCopyBufferWhenSubmittingCommandsThenTaskLevelIsIncremented) {
auto pCmdQ = make_releaseable<MockCommandQueue>(mockContext, pClDevice, nullptr);
MockEvent<Event> event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0);
MockEvent<Event> event(pCmdQ.get(), CL_COMMAND_NDRANGE_KERNEL, 0, 0);
auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
auto buffer = new UnalignedBuffer;
@ -734,7 +734,7 @@ TEST_F(InternalsEventTest, GivenProfilingWHENMapOperationTHENTimesSet) {
TEST_F(InternalsEventTest, GivenUnMapOperationWhenSubmittingCommandsThenTaskLevelIsIncremented) {
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0};
auto pCmdQ = make_releaseable<MockCommandQueue>(mockContext, pClDevice, props);
MockEvent<Event> event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0);
MockEvent<Event> event(pCmdQ.get(), CL_COMMAND_NDRANGE_KERNEL, 0, 0);
auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
auto buffer = new UnalignedBuffer;
@ -756,7 +756,7 @@ TEST_F(InternalsEventTest, GivenUnMapOperationWhenSubmittingCommandsThenTaskLeve
TEST_F(InternalsEventTest, givenBlockedMapCommandWhenSubmitIsCalledItReleasesMemObjectReference) {
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0};
auto pCmdQ = std::make_unique<MockCommandQueue>(mockContext, pClDevice, props);
MockEvent<Event> event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0);
MockEvent<Event> event(pCmdQ.get(), CL_COMMAND_NDRANGE_KERNEL, 0, 0);
auto buffer = new UnalignedBuffer;
@ -775,7 +775,7 @@ TEST_F(InternalsEventTest, givenBlockedMapCommandWhenSubmitIsCalledItReleasesMem
TEST_F(InternalsEventTest, GivenUnMapOperationNonZeroCopyBufferWhenSubmittingCommandsThenTaskLevelIsIncremented) {
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0};
auto pCmdQ = std::make_unique<MockCommandQueue>(mockContext, pClDevice, props);
MockEvent<Event> event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0);
MockEvent<Event> event(pCmdQ.get(), CL_COMMAND_NDRANGE_KERNEL, 0, 0);
auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
auto buffer = new UnalignedBuffer;
@ -1398,7 +1398,7 @@ HWTEST_F(EventTest, givenQuickKmdSleepRequestWhenWaitIsCalledThenPassRequestToWa
pDevice->resetCommandStreamReceiver(csr);
Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0);
event.updateCompletionStamp(1u, 1u, 1u);
event.updateCompletionStamp(1u, 0, 1u, 1u);
EXPECT_CALL(*csr, waitForCompletionWithTimeout(::testing::_,
localHwInfo.capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds, ::testing::_))
@ -1426,7 +1426,7 @@ HWTEST_F(EventTest, givenNonQuickKmdSleepRequestWhenWaitIsCalledThenPassRequestT
pDevice->resetCommandStreamReceiver(csr);
Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0);
event.updateCompletionStamp(1u, 1u, 1u);
event.updateCompletionStamp(1u, 0, 1u, 1u);
EXPECT_CALL(*csr, waitForCompletionWithTimeout(::testing::_,
localHwInfo.capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds, ::testing::_))

View File

@ -158,7 +158,7 @@ TEST(EventsTracker, whenCallDumpEdgeThenGetStringWithProperLabelOfDumpedEdge) {
TEST(EventsTracker, givenEventWithTaskLevelAndCountNotReadyThenDumpingNodeWithNotReadyLabels) {
UserEvent uEvent;
uEvent.taskLevel = CompletionStamp::notReady;
uEvent.updateTaskCount(CompletionStamp::notReady);
uEvent.updateTaskCount(CompletionStamp::notReady, 0);
std::stringstream stream;
std::unordered_map<Event *, int64_t> map;
@ -175,7 +175,7 @@ TEST(EventsTracker, givenEventWithTaskLevelAndCountNotReadyThenDumpingNodeWithNo
TEST(EventsTracker, whenCallDumpNodeFunctionThenDumpingNodeWithProperTaskLevelAndCountValues) {
UserEvent uEvent;
uEvent.taskLevel = 1;
uEvent.updateTaskCount(1);
uEvent.updateTaskCount(1, 0);
std::stringstream stream;
std::unordered_map<Event *, int64_t> map;
@ -232,7 +232,7 @@ TEST(EventsTracker, givenCmdqAndItsVirtualEventThenDumpingWithProperLabels) {
MockCommandQueue cmdq;
VirtualEvent vEvent(&cmdq, &ctx);
vEvent.setCurrentCmdQVirtualEvent(true);
vEvent.updateTaskCount(1);
vEvent.updateTaskCount(1, 0);
std::stringstream stream;
std::unordered_map<Event *, int64_t> map;
@ -395,7 +395,7 @@ TEST(EventsTracker, givenCmdqAndItsVirtualEventThenDumpingProperGraph) {
MockCommandQueue cmdq;
VirtualEvent vEvent(&cmdq, &ctx);
vEvent.setCurrentCmdQVirtualEvent(true);
vEvent.updateTaskCount(1);
vEvent.updateTaskCount(1, 0);
std::stringstream stream;
std::unordered_map<Event *, int64_t> map;
@ -434,9 +434,9 @@ TEST(EventsTracker, givenTwoEventsWithCommonParentEventThenDumpingProperGraph) {
EXPECT_STREQ(expected.str().c_str(), stream.str().c_str());
uEventChild1.updateCompletionStamp(0, 0, 0);
uEventChild2.updateCompletionStamp(0, 0, 0);
uEvent.updateCompletionStamp(0, 0, 0);
uEventChild1.updateCompletionStamp(0, 0, 0, 0);
uEventChild2.updateCompletionStamp(0, 0, 0, 0);
uEvent.updateCompletionStamp(0, 0, 0, 0);
uEvent.setStatus(0);
}
@ -611,10 +611,10 @@ TEST(EventsTracker, givenEventsWithDependenciesBetweenThemThenDumpingProperGraph
EXPECT_STREQ(expected.str().c_str(), evTrackerMock.streamMock.c_str());
uEventChild1.updateCompletionStamp(0, 0, 0);
uEventChild2.updateCompletionStamp(0, 0, 0);
uEvent2.updateCompletionStamp(0, 0, 0);
uEvent1.updateCompletionStamp(0, 0, 0);
uEventChild1.updateCompletionStamp(0, 0, 0, 0);
uEventChild2.updateCompletionStamp(0, 0, 0, 0);
uEvent2.updateCompletionStamp(0, 0, 0, 0);
uEvent1.updateCompletionStamp(0, 0, 0, 0);
uEvent2.setStatus(0);
uEvent1.setStatus(0);
}

View File

@ -98,7 +98,7 @@ HWTEST_F(KmdNotifyTests, givenTaskCountWhenWaitUntilCompletionCalledThenAlwaysTr
EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, 2, taskCountToWait)).Times(1).WillOnce(::testing::Return(true));
cmdQ->waitUntilComplete(taskCountToWait, flushStampToWait, false);
cmdQ->waitUntilComplete(taskCountToWait, 0, flushStampToWait, false);
}
HWTEST_F(KmdNotifyTests, givenTaskCountAndKmdNotifyDisabledWhenWaitUntilCompletionCalledThenTryCpuPollingWithoutTimeout) {
@ -108,7 +108,7 @@ HWTEST_F(KmdNotifyTests, givenTaskCountAndKmdNotifyDisabledWhenWaitUntilCompleti
EXPECT_CALL(*csr, waitForCompletionWithTimeout(false, 0, taskCountToWait)).Times(1).WillOnce(::testing::Return(true));
EXPECT_CALL(*csr, waitForFlushStamp(::testing::_)).Times(0);
cmdQ->waitUntilComplete(taskCountToWait, flushStampToWait, false);
cmdQ->waitUntilComplete(taskCountToWait, 0, flushStampToWait, false);
}
HWTEST_F(KmdNotifyTests, givenNotReadyTaskCountWhenWaitUntilCompletionCalledThenTryCpuPollingAndKmdWait) {
@ -121,7 +121,7 @@ HWTEST_F(KmdNotifyTests, givenNotReadyTaskCountWhenWaitUntilCompletionCalledThen
EXPECT_CALL(*csr, waitForCompletionWithTimeout(false, 0, taskCountToWait)).Times(1).WillOnce(::testing::Return(false));
//we have unrecoverable for this case, this will throw.
EXPECT_THROW(cmdQ->waitUntilComplete(taskCountToWait, flushStampToWait, false), std::exception);
EXPECT_THROW(cmdQ->waitUntilComplete(taskCountToWait, 0, flushStampToWait, false), std::exception);
}
HWTEST_F(KmdNotifyTests, givenReadyTaskCountWhenWaitUntilCompletionCalledThenTryCpuPollingAndDontCallKmdWait) {
@ -131,7 +131,7 @@ HWTEST_F(KmdNotifyTests, givenReadyTaskCountWhenWaitUntilCompletionCalledThenTry
EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, 2, taskCountToWait)).Times(1).WillOnce(::testing::Return(true));
EXPECT_CALL(*csr, waitForFlushStamp(::testing::_)).Times(0);
cmdQ->waitUntilComplete(taskCountToWait, flushStampToWait, false);
cmdQ->waitUntilComplete(taskCountToWait, 0, flushStampToWait, false);
}
HWTEST_F(KmdNotifyTests, givenDefaultArgumentWhenWaitUntilCompleteIsCalledThenDisableQuickKmdSleep) {
@ -140,7 +140,7 @@ HWTEST_F(KmdNotifyTests, givenDefaultArgumentWhenWaitUntilCompleteIsCalledThenDi
EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, expectedTimeout, taskCountToWait)).Times(1).WillOnce(::testing::Return(true));
cmdQ->waitUntilComplete(taskCountToWait, flushStampToWait, false);
cmdQ->waitUntilComplete(taskCountToWait, 0, flushStampToWait, false);
}
HWTEST_F(KmdNotifyTests, givenEnabledQuickSleepWhenWaitUntilCompleteIsCalledThenChangeDelayValue) {
@ -149,7 +149,7 @@ HWTEST_F(KmdNotifyTests, givenEnabledQuickSleepWhenWaitUntilCompleteIsCalledThen
EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, expectedTimeout, taskCountToWait)).Times(1).WillOnce(::testing::Return(true));
cmdQ->waitUntilComplete(taskCountToWait, flushStampToWait, true);
cmdQ->waitUntilComplete(taskCountToWait, 0, flushStampToWait, true);
}
HWTEST_F(KmdNotifyTests, givenDisabledQuickSleepWhenWaitUntilCompleteWithQuickSleepRequestIsCalledThenUseBaseDelayValue) {
@ -159,7 +159,7 @@ HWTEST_F(KmdNotifyTests, givenDisabledQuickSleepWhenWaitUntilCompleteWithQuickSl
EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, expectedTimeout, taskCountToWait)).Times(1).WillOnce(::testing::Return(true));
cmdQ->waitUntilComplete(taskCountToWait, flushStampToWait, true);
cmdQ->waitUntilComplete(taskCountToWait, 0, flushStampToWait, true);
}
HWTEST_F(KmdNotifyTests, givenNotReadyTaskCountWhenPollForCompletionCalledThenTimeout) {
@ -213,7 +213,7 @@ HWTEST_F(KmdNotifyTests, givenKmdNotifyDisabledWhenQueueHasPowerSavingModeAndCal
auto csr = createMockCsr<FamilyType>();
EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, 1, ::testing::_)).Times(1).WillOnce(::testing::Return(true));
cmdQ->throttle = QueueThrottle::LOW;
cmdQ->waitUntilComplete(1, 1, false);
cmdQ->waitUntilComplete(1, 0, 1, false);
}
HWTEST_F(KmdNotifyTests, givenKmdNotifyDisabledWhenQueueHasPowerSavingModButThereIsNoFlushStampeAndCallWaitThenTimeoutIsDisabled) {
@ -222,7 +222,7 @@ HWTEST_F(KmdNotifyTests, givenKmdNotifyDisabledWhenQueueHasPowerSavingModButTher
EXPECT_CALL(*csr, waitForCompletionWithTimeout(false, 0, ::testing::_)).Times(1).WillOnce(::testing::Return(true));
cmdQ->throttle = QueueThrottle::LOW;
cmdQ->waitUntilComplete(1, 0, false);
cmdQ->waitUntilComplete(1, 0, 0, false);
}
HWTEST_F(KmdNotifyTests, givenQuickSleepRequestWhenItsSporadicWaitOptimizationIsDisabledThenDontOverrideQuickSleepRequest) {
@ -408,4 +408,3 @@ TEST_F(KmdNotifyTests, givenEnabledKmdNotifyMechanismWhenPowerSavingModeIsSetAnd
EXPECT_FALSE(timeoutEnabled);
EXPECT_EQ(0, timeout);
}

View File

@ -60,9 +60,9 @@ class MockCommandQueue : public CommandQueue {
return writeBufferRetValue;
}
void waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override {
latestTaskCountWaited = taskCountToWait;
return CommandQueue::waitUntilComplete(taskCountToWait, flushStampToWait, useQuickKmdSleep);
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcsTaskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override {
latestTaskCountWaited = gpgpuTaskCountToWait;
return CommandQueue::waitUntilComplete(gpgpuTaskCountToWait, bcsTaskCountToWait, flushStampToWait, useQuickKmdSleep);
}
cl_int enqueueCopyImage(Image *srcImage, Image *dstImage, const size_t srcOrigin[3],
@ -272,9 +272,9 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
notifyEnqueueReadImageCalled = true;
}
void waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override {
latestTaskCountWaited = taskCountToWait;
return BaseClass::waitUntilComplete(taskCountToWait, flushStampToWait, useQuickKmdSleep);
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcsTaskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override {
latestTaskCountWaited = gpgpuTaskCountToWait;
return BaseClass::waitUntilComplete(gpgpuTaskCountToWait, bcsTaskCountToWait, flushStampToWait, useQuickKmdSleep);
}
bool isCacheFlushForBcsRequired() const override {