Removed unused atomic latestTaskCountWaited

Related-To: NEO-3869

Change-Id: If7e9db2645c9c9c38d65a0b200960e387441c9ca
Signed-off-by: Konstanty Misiak <konstanty.misiak@intel.com>
This commit is contained in:
Misiak
2020-01-13 15:54:52 +01:00
committed by sys_ocldev
parent bd9cd46ab9
commit b498fea989
11 changed files with 148 additions and 105 deletions

View File

@@ -181,44 +181,44 @@ HWTEST_F(CommandQueueHwTest, WhenAddMapUnmapToWaitlistEventsThenDependenciesAreN
HWTEST_F(CommandQueueHwTest, givenMapCommandWhenZeroStateCommandIsSubmittedThenTaskCountIsNotBeingWaited) {
auto buffer = new MockBuffer;
CommandQueueHw<FamilyType> *pHwQ = reinterpret_cast<CommandQueueHw<FamilyType> *>(pCmdQ);
MockCommandQueueHw<FamilyType> mockCmdQueueHw(context, pClDevice, nullptr);
MockEventBuilder eventBuilder;
MemObjSizeArray size = {{1, 1, 1}};
MemObjOffsetArray offset = {{0, 0, 0}};
pHwQ->enqueueBlockedMapUnmapOperation(nullptr,
0,
MAP,
buffer,
size, offset, false,
eventBuilder);
mockCmdQueueHw.enqueueBlockedMapUnmapOperation(nullptr,
0,
MAP,
buffer,
size, offset, false,
eventBuilder);
EXPECT_NE(nullptr, pHwQ->virtualEvent);
pHwQ->virtualEvent->setStatus(CL_COMPLETE);
EXPECT_NE(nullptr, mockCmdQueueHw.virtualEvent);
mockCmdQueueHw.virtualEvent->setStatus(CL_COMPLETE);
EXPECT_EQ(std::numeric_limits<uint32_t>::max(), mockCmdQueueHw.latestTaskCountWaited);
EXPECT_EQ(std::numeric_limits<uint32_t>::max(), pHwQ->latestTaskCountWaited);
buffer->decRefInternal();
}
HWTEST_F(CommandQueueHwTest, givenMapCommandWhenZeroStateCommandIsSubmittedOnNonZeroCopyBufferThenTaskCountIsBeingWaited) {
auto buffer = new MockBuffer;
buffer->isZeroCopy = false;
CommandQueueHw<FamilyType> *pHwQ = reinterpret_cast<CommandQueueHw<FamilyType> *>(pCmdQ);
MockCommandQueueHw<FamilyType> mockCmdQueueHw(context, pClDevice, nullptr);
MockEventBuilder eventBuilder;
MemObjSizeArray size = {{1, 1, 1}};
MemObjOffsetArray offset = {{0, 0, 0}};
pHwQ->enqueueBlockedMapUnmapOperation(nullptr,
0,
MAP,
buffer,
size, offset, false,
eventBuilder);
mockCmdQueueHw.enqueueBlockedMapUnmapOperation(nullptr,
0,
MAP,
buffer,
size, offset, false,
eventBuilder);
EXPECT_NE(nullptr, pHwQ->virtualEvent);
pHwQ->virtualEvent->setStatus(CL_COMPLETE);
EXPECT_NE(nullptr, mockCmdQueueHw.virtualEvent);
mockCmdQueueHw.virtualEvent->setStatus(CL_COMPLETE);
EXPECT_EQ(1u, mockCmdQueueHw.latestTaskCountWaited);
EXPECT_EQ(1u, pHwQ->latestTaskCountWaited);
buffer->decRefInternal();
}

View File

@@ -1,10 +1,11 @@
/*
* Copyright (C) 2017-2019 Intel Corporation
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "runtime/command_queue/command_queue_hw.h"
#include "runtime/command_queue/gpgpu_walker.h"
#include "runtime/command_stream/command_stream_receiver.h"
#include "runtime/event/user_event.h"
@@ -12,6 +13,7 @@
#include "test.h"
#include "unit_tests/command_queue/command_enqueue_fixture.h"
#include "unit_tests/gen_common/gen_cmd_parse.h"
#include "unit_tests/mocks/mock_command_queue.h"
using namespace NEO;
@@ -187,10 +189,12 @@ HWTEST_F(BarrierTest, WhenEnqueingBarrierWithWaitListThenDependenciesShouldSync)
delete pEvent;
}
HWTEST_F(BarrierTest, givenNotBlockedCommandQueueAndEnqueueBarrierWithWaitlistReturningEventWhenCallIsMadeThenDontWaitUntilEventIsSignaled) {
MockCommandQueueHw<FamilyType> mockCmdQueue(context, pClDevice, nullptr);
// In N:1, event.level <= pCmdQ.level
Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 5, 15);
Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 6, 16);
Event event3(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 1, 17);
Event event1(&mockCmdQueue, CL_COMMAND_NDRANGE_KERNEL, 5, 15);
Event event2(&mockCmdQueue, CL_COMMAND_NDRANGE_KERNEL, 6, 16);
Event event3(&mockCmdQueue, CL_COMMAND_NDRANGE_KERNEL, 1, 17);
cl_event eventWaitList[] =
{
&event1,
@@ -199,17 +203,18 @@ HWTEST_F(BarrierTest, givenNotBlockedCommandQueueAndEnqueueBarrierWithWaitlistRe
cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]);
cl_event event = nullptr;
auto latestTaskCountWaitedBeforeEnqueue = this->pCmdQ->latestTaskCountWaited.load();
auto retVal = pCmdQ->enqueueBarrierWithWaitList(
auto latestTaskCountWaitedBeforeEnqueue = mockCmdQueue.latestTaskCountWaited.load();
auto retVal = mockCmdQueue.enqueueBarrierWithWaitList(
numEventsInWaitList,
eventWaitList,
&event);
auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
auto &csr = mockCmdQueue.getGpgpuCommandStreamReceiver();
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(latestTaskCountWaitedBeforeEnqueue, this->pCmdQ->latestTaskCountWaited);
EXPECT_EQ(latestTaskCountWaitedBeforeEnqueue, mockCmdQueue.latestTaskCountWaited);
auto pEvent = castToObject<Event>(event);
EXPECT_NE(nullptr, pEvent);
if (csr.peekTimestampPacketWriteEnabled()) {
EXPECT_EQ(csr.peekTaskCount(), pEvent->peekTaskCount());

View File

@@ -555,15 +555,42 @@ HWTEST_P(EnqueueKernelPrintfTest, GivenKernelWithPrintfWhenBeingDispatchedThenL3
patchData.Size = 256;
patchData.DataParamOffset = 64;
MockCommandQueueHw<FamilyType> mockCmdQueue(context, pClDevice, nullptr);
MockKernelWithInternals mockKernel(*pClDevice);
mockKernel.crossThreadData[64] = 0;
mockKernel.kernelInfo.patchInfo.pAllocateStatelessPrintfSurface = &patchData;
auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
auto &csr = mockCmdQueue.getGpgpuCommandStreamReceiver();
auto latestSentTaskCount = csr.peekTaskCount();
enqueueKernel<FamilyType, false>(mockKernel);
cl_uint workDim = 1;
size_t globalWorkOffset[3] = {0, 0, 0};
cl_uint numEventsInWaitList = 0;
cl_event *eventWaitList = nullptr;
cl_event *event = nullptr;
FillValues();
// Compute # of expected work items
expectedWorkItems = 1;
for (auto i = 0u; i < workDim; i++) {
expectedWorkItems *= globalWorkSize[i];
}
auto retVal = mockCmdQueue.enqueueKernel(
mockKernel,
workDim,
globalWorkOffset,
globalWorkSize,
localWorkSize,
numEventsInWaitList,
eventWaitList,
event);
ASSERT_EQ(CL_SUCCESS, retVal);
auto newLatestSentTaskCount = csr.peekTaskCount();
EXPECT_GT(newLatestSentTaskCount, latestSentTaskCount);
EXPECT_EQ(pCmdQ->latestTaskCountWaited, newLatestSentTaskCount);
EXPECT_EQ(mockCmdQueue.latestTaskCountWaited, newLatestSentTaskCount);
}
HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueKernelPrintfTest, GivenKernelWithPrintfBlockedByEventWhenEventUnblockedThenL3CacheIsFlushed) {
@@ -575,10 +602,11 @@ HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueKernelPrintfTest, GivenKernelWithPrintfBlocke
patchData.Size = 256;
patchData.DataParamOffset = 64;
MockCommandQueueHw<FamilyType> mockCommandQueue(context, pClDevice, nullptr);
MockKernelWithInternals mockKernel(*pClDevice);
mockKernel.crossThreadData[64] = 0;
mockKernel.kernelInfo.patchInfo.pAllocateStatelessPrintfSurface = &patchData;
auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
auto &csr = mockCommandQueue.getGpgpuCommandStreamReceiver();
auto latestSentDcFlushTaskCount = csr.peekTaskCount();
cl_uint workDim = 1;
@@ -587,7 +615,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueKernelPrintfTest, GivenKernelWithPrintfBlocke
FillValues();
cl_event blockedEvent = &userEvent;
auto retVal = pCmdQ->enqueueKernel(
auto retVal = mockCommandQueue.enqueueKernel(
mockKernel,
workDim,
globalWorkOffset,
@@ -600,11 +628,11 @@ HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueKernelPrintfTest, GivenKernelWithPrintfBlocke
userEvent.setStatus(CL_COMPLETE);
parseCommands<FamilyType>(*pCmdQ);
parseCommands<FamilyType>(mockCommandQueue);
auto newLatestSentDCFlushTaskCount = csr.peekTaskCount();
EXPECT_GT(newLatestSentDCFlushTaskCount, latestSentDcFlushTaskCount);
EXPECT_EQ(pCmdQ->latestTaskCountWaited, newLatestSentDCFlushTaskCount);
EXPECT_EQ(mockCommandQueue.latestTaskCountWaited, newLatestSentDCFlushTaskCount);
}
HWTEST_P(EnqueueKernelPrintfTest, GivenKernelWithPrintfBlockedByEventWhenEventUnblockedThenOutputPrinted) {

View File

@@ -14,6 +14,7 @@
#include "unit_tests/fixtures/buffer_fixture.h"
#include "unit_tests/fixtures/device_fixture.h"
#include "unit_tests/mocks/mock_buffer.h"
#include "unit_tests/mocks/mock_command_queue.h"
#include "unit_tests/mocks/mock_context.h"
#include "unit_tests/mocks/mock_kernel.h"
@@ -260,7 +261,7 @@ TEST_F(EnqueueMapBufferTest, GivenValidArgsWhenMappingBufferThenSuccessIsReturne
EXPECT_EQ(CL_SUCCESS, retVal);
}
TEST_F(EnqueueMapBufferTest, givenNonBlockingReadOnlyMapBufferOnZeroCopyBufferWhenItIsCalledThenSynchronizationIsNotMadeUntilWaitForEvents) {
HWTEST_F(EnqueueMapBufferTest, givenNonBlockingReadOnlyMapBufferOnZeroCopyBufferWhenItIsCalledThenSynchronizationIsNotMadeUntilWaitForEvents) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableAsyncEventsHandler.set(false);
cl_event mapEventReturned = nullptr;
@@ -285,18 +286,20 @@ TEST_F(EnqueueMapBufferTest, givenNonBlockingReadOnlyMapBufferOnZeroCopyBufferWh
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_NE(nullptr, buffer);
auto &commandStreamReceiver = pCmdQ->getGpgpuCommandStreamReceiver();
MockCommandQueueHw<FamilyType> mockCmdQueue(context, pClDevice, nullptr);
auto &commandStreamReceiver = mockCmdQueue.getGpgpuCommandStreamReceiver();
uint32_t taskCount = commandStreamReceiver.peekTaskCount();
EXPECT_EQ(0u, taskCount);
// enqueue something that can be finished...
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, 0, &GWS, nullptr, 0, nullptr, nullptr);
retVal = clEnqueueNDRangeKernel(&mockCmdQueue, kernel, 1, 0, &GWS, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(retVal, CL_SUCCESS);
EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount());
auto ptrResult = clEnqueueMapBuffer(
pCmdQ,
&mockCmdQueue,
buffer,
CL_FALSE,
CL_MAP_READ,
@@ -334,14 +337,14 @@ TEST_F(EnqueueMapBufferTest, givenNonBlockingReadOnlyMapBufferOnZeroCopyBufferWh
//wait for event do not sent flushTask
EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount());
EXPECT_EQ(1u, pCmdQ->latestTaskCountWaited);
EXPECT_EQ(1u, mockCmdQueue.latestTaskCountWaited);
EXPECT_TRUE(neoEvent->updateStatusAndCheckCompletion());
EXPECT_EQ(1u, callbackCalled);
retVal = clEnqueueUnmapMemObject(
pCmdQ,
&mockCmdQueue,
buffer,
ptrResult,
0,
@@ -514,7 +517,7 @@ TEST_F(EnqueueMapBufferTest, givenNonBlockingMapBufferAfterL3IsAlreadyFlushedThe
clReleaseEvent(eventReturned);
}
TEST_F(EnqueueMapBufferTest, GivenBufferThatIsNotZeroCopyWhenNonBlockingMapIsCalledThenFinishIsCalledAndDataTransferred) {
HWTEST_F(EnqueueMapBufferTest, GivenBufferThatIsNotZeroCopyWhenNonBlockingMapIsCalledThenFinishIsCalledAndDataTransferred) {
const auto bufferSize = 100;
auto localSize = bufferSize;
char misaligned[bufferSize] = {1};
@@ -540,16 +543,18 @@ TEST_F(EnqueueMapBufferTest, GivenBufferThatIsNotZeroCopyWhenNonBlockingMapIsCal
auto pBuffer = castToObject<Buffer>(buffer);
ASSERT_FALSE(pBuffer->isMemObjZeroCopy());
MockCommandQueueHw<FamilyType> mockCmdQueue(context, pClDevice, nullptr);
// enqueue something that can be finished
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, 0, &GWS, nullptr, 0, nullptr, nullptr);
retVal = clEnqueueNDRangeKernel(&mockCmdQueue, kernel, 1, 0, &GWS, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(retVal, CL_SUCCESS);
auto &commandStreamReceiver = pCmdQ->getGpgpuCommandStreamReceiver();
auto &commandStreamReceiver = mockCmdQueue.getGpgpuCommandStreamReceiver();
uint32_t taskCount = commandStreamReceiver.peekTaskCount();
EXPECT_EQ(1u, taskCount);
auto ptrResult = clEnqueueMapBuffer(
pCmdQ,
&mockCmdQueue,
buffer,
CL_FALSE,
CL_MAP_READ,
@@ -566,7 +571,7 @@ TEST_F(EnqueueMapBufferTest, GivenBufferThatIsNotZeroCopyWhenNonBlockingMapIsCal
commandStreamReceiver.peekTaskCount();
EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount());
EXPECT_EQ(1u, pCmdQ->latestTaskCountWaited);
EXPECT_EQ(1u, mockCmdQueue.latestTaskCountWaited);
retVal = clReleaseMemObject(buffer);
EXPECT_EQ(CL_SUCCESS, retVal);