fix: dont allocate TSP for OOQ without Event

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2023-07-05 19:13:19 +00:00
committed by Compute-Runtime-Automation
parent c892b8c6f3
commit d96cf5846a
9 changed files with 191 additions and 38 deletions

View File

@@ -1281,17 +1281,10 @@ WaitStatus CommandQueue::waitForAllEngines(bool blockedQueue, PrintfHandler *pri
return WaitStatus::GpuHang; return WaitStatus::GpuHang;
} }
TimestampPacketContainer nodesToRelease;
if (deferredTimestampPackets) {
deferredTimestampPackets->swapNodes(nodesToRelease);
}
TimestampPacketContainer multiRootSyncNodesToRelease;
if (deferredMultiRootSyncNodes.get()) {
deferredMultiRootSyncNodes->swapNodes(multiRootSyncNodesToRelease);
}
waitStatus = waitUntilComplete(taskCount, activeBcsStates, flushStamp->peekStamp(), false, cleanTemporaryAllocationsList, waitedOnTimestamps); waitStatus = waitUntilComplete(taskCount, activeBcsStates, flushStamp->peekStamp(), false, cleanTemporaryAllocationsList, waitedOnTimestamps);
releaseDeferredNodes();
if (printfHandler) { if (printfHandler) {
if (!printfHandler->printEnqueueOutput()) { if (!printfHandler->printEnqueueOutput()) {
return WaitStatus::GpuHang; return WaitStatus::GpuHang;
@@ -1378,4 +1371,15 @@ bool CommandQueue::migrateMultiGraphicsAllocationsIfRequired(const BuiltinOpPara
return migrationHandled; return migrationHandled;
} }
void CommandQueue::releaseDeferredNodes() {
TimestampPacketContainer nodesToRelease;
if (deferredTimestampPackets) {
deferredTimestampPackets->swapNodes(nodesToRelease);
}
TimestampPacketContainer multiRootSyncNodesToRelease;
if (deferredMultiRootSyncNodes.get()) {
deferredMultiRootSyncNodes->swapNodes(multiRootSyncNodesToRelease);
}
}
} // namespace NEO } // namespace NEO

View File

@@ -374,6 +374,10 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
const std::array<CopyEngineState, bcsInfoMaskSize> &peekActiveBcsStates() const { return bcsStates; } const std::array<CopyEngineState, bcsInfoMaskSize> &peekActiveBcsStates() const { return bcsStates; }
void releaseDeferredNodes();
TaskCountType peekTaskCount() const { return taskCount; }
protected: protected:
void *enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet); void *enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet);
cl_int enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr, EventsRequest &eventsRequest); cl_int enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr, EventsRequest &eventsRequest);

View File

@@ -171,11 +171,13 @@ bool CommandQueueHw<Family>::waitForTimestamps(Range<CopyEngineState> copyEngine
using TSPacketType = typename Family::TimestampPacketType; using TSPacketType = typename Family::TimestampPacketType;
bool waited = false; bool waited = false;
if (isOOQEnabled()) {
// TSP for OOQ dispatch is optional. We need to wait for task count.
return waited;
}
if (isWaitForTimestampsEnabled()) { if (isWaitForTimestampsEnabled()) {
waited = waitForTimestampsWithinContainer<TSPacketType>(mainContainer, getGpgpuCommandStreamReceiver(), status); waited = waitForTimestampsWithinContainer<TSPacketType>(mainContainer, getGpgpuCommandStreamReceiver(), status);
if (isOOQEnabled()) {
waitForTimestampsWithinContainer<TSPacketType>(deferredContainer, getGpgpuCommandStreamReceiver(), status);
}
if (waited) { if (waited) {
getGpgpuCommandStreamReceiver().downloadAllocations(); getGpgpuCommandStreamReceiver().downloadAllocations();

View File

@@ -211,7 +211,12 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
if (isCacheFlushCommand(commandType) || isMarkerWithPostSyncWrite || isNonStallingIoqBarrierWithDependencies) { if (isCacheFlushCommand(commandType) || isMarkerWithPostSyncWrite || isNonStallingIoqBarrierWithDependencies) {
nodesCount = 1; nodesCount = 1;
} else if (!multiDispatchInfo.empty()) { } else if (!multiDispatchInfo.empty()) {
nodesCount = estimateTimestampPacketNodesCount(multiDispatchInfo); if (isOOQEnabled() && !event) {
// TSP not needed. Release current node.
timestampPacketContainer->moveNodesToNewContainer(*deferredTimestampPackets);
} else {
nodesCount = estimateTimestampPacketNodesCount(multiDispatchInfo);
}
} }
if (isCacheFlushForBcsRequired() && enqueueWithBlitAuxTranslation) { if (isCacheFlushForBcsRequired() && enqueueWithBlitAuxTranslation) {

View File

@@ -68,7 +68,8 @@ inline void HardwareInterface<GfxFamily>::programWalker(
size_t numWorkGroups[3] = {walkerArgs.numberOfWorkgroups->x, walkerArgs.numberOfWorkgroups->y, walkerArgs.numberOfWorkgroups->z}; size_t numWorkGroups[3] = {walkerArgs.numberOfWorkgroups->x, walkerArgs.numberOfWorkgroups->y, walkerArgs.numberOfWorkgroups->z};
auto threadGroupCount = static_cast<uint32_t>(walkerArgs.numberOfWorkgroups->x * walkerArgs.numberOfWorkgroups->y * walkerArgs.numberOfWorkgroups->z); auto threadGroupCount = static_cast<uint32_t>(walkerArgs.numberOfWorkgroups->x * walkerArgs.numberOfWorkgroups->y * walkerArgs.numberOfWorkgroups->z);
if (walkerArgs.currentTimestampPacketNodes && commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { if (walkerArgs.currentTimestampPacketNodes && walkerArgs.currentTimestampPacketNodes->peekNodes().size() > 0 &&
commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
auto timestampPacketNode = walkerArgs.currentTimestampPacketNodes->peekNodes().at(walkerArgs.currentDispatchIndex); auto timestampPacketNode = walkerArgs.currentTimestampPacketNodes->peekNodes().at(walkerArgs.currentDispatchIndex);
GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(&commandStream, &walkerCmd, timestampPacketNode, rootDeviceEnvironment); GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(&commandStream, &walkerCmd, timestampPacketNode, rootDeviceEnvironment);
} }

View File

@@ -79,7 +79,8 @@ inline void HardwareInterface<GfxFamily>::programWalker(
auto &queueCsr = commandQueue.getGpgpuCommandStreamReceiver(); auto &queueCsr = commandQueue.getGpgpuCommandStreamReceiver();
auto &rootDeviceEnvironment = commandQueue.getDevice().getRootDeviceEnvironment(); auto &rootDeviceEnvironment = commandQueue.getDevice().getRootDeviceEnvironment();
if (walkerArgs.currentTimestampPacketNodes && queueCsr.peekTimestampPacketWriteEnabled()) {
if (walkerArgs.currentTimestampPacketNodes && (walkerArgs.currentTimestampPacketNodes->peekNodes().size() > 0)) {
auto timestampPacket = walkerArgs.currentTimestampPacketNodes->peekNodes().at(walkerArgs.currentDispatchIndex); auto timestampPacket = walkerArgs.currentTimestampPacketNodes->peekNodes().at(walkerArgs.currentDispatchIndex);
GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(&commandStream, &walkerCmd, timestampPacket, rootDeviceEnvironment); GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(&commandStream, &walkerCmd, timestampPacket, rootDeviceEnvironment);
} }
@@ -125,7 +126,8 @@ inline void HardwareInterface<GfxFamily>::programWalker(
auto devices = queueCsr.getOsContext().getDeviceBitfield(); auto devices = queueCsr.getOsContext().getDeviceBitfield();
auto partitionWalker = ImplicitScalingHelper::isImplicitScalingEnabled(devices, true); auto partitionWalker = ImplicitScalingHelper::isImplicitScalingEnabled(devices, true);
if (walkerArgs.currentTimestampPacketNodes && DebugManager.flags.PrintTimestampPacketUsage.get() == 1) { if (walkerArgs.currentTimestampPacketNodes && walkerArgs.currentTimestampPacketNodes->peekNodes().size() > 0 &&
DebugManager.flags.PrintTimestampPacketUsage.get() == 1) {
auto gpuVa = walkerArgs.currentTimestampPacketNodes->peekNodes()[walkerArgs.currentDispatchIndex]->getGpuAddress(); auto gpuVa = walkerArgs.currentTimestampPacketNodes->peekNodes()[walkerArgs.currentDispatchIndex]->getGpuAddress();
printf("\nPID:%u, TSP used for Walker: 0x%" PRIX64 ", cmdBuffer pos: 0x%" PRIX64, SysCalls::getProcessId(), gpuVa, commandStream.getCurrentGpuAddressPosition()); printf("\nPID:%u, TSP used for Walker: 0x%" PRIX64 ", cmdBuffer pos: 0x%" PRIX64, SysCalls::getProcessId(), gpuVa, commandStream.getCurrentGpuAddressPosition());
} }

View File

@@ -451,6 +451,19 @@ inline WaitStatus Event::wait(bool blocking, bool useQuickKmdSleep) {
DEBUG_BREAK_IF(this->taskLevel == CompletionStamp::notReady && this->executionStatus >= 0); DEBUG_BREAK_IF(this->taskLevel == CompletionStamp::notReady && this->executionStatus >= 0);
{
TakeOwnershipWrapper<CommandQueue> queueOwnership(*cmdQueue);
bool releaseNodes = (taskCount == cmdQueue->peekTaskCount());
if (bcsState.isValid()) {
releaseNodes &= (bcsState.taskCount == cmdQueue->peekBcsTaskCount(bcsState.engineType));
}
if (releaseNodes) {
cmdQueue->releaseDeferredNodes();
}
}
auto *allocationStorage = cmdQueue->getGpgpuCommandStreamReceiver().getInternalAllocationStorage(); auto *allocationStorage = cmdQueue->getGpgpuCommandStreamReceiver().getInternalAllocationStorage();
allocationStorage->cleanAllocationList(this->taskCount, TEMPORARY_ALLOCATION); allocationStorage->cleanAllocationList(this->taskCount, TEMPORARY_ALLOCATION);

View File

@@ -756,7 +756,7 @@ HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitForQueuesWhenFinishThenWa
EXPECT_EQ(csr.waitForCompletionWithTimeoutTaskCountCalled, 0u); EXPECT_EQ(csr.waitForCompletionWithTimeoutTaskCountCalled, 0u);
} }
HWTEST_F(TimestampPacketTests, givenOOQAndEnableTimestampWaitForQueuesWhenFinishThenWaitOnTimestamp) { HWTEST_F(TimestampPacketTests, givenOOQAndEnableTimestampWaitForQueuesWhenFinishThenDontWaitOnTimestamp) {
DebugManagerStateRestore restorer; DebugManagerStateRestore restorer;
DebugManager.flags.UpdateTaskCountFromWait.set(3); DebugManager.flags.UpdateTaskCountFromWait.set(3);
DebugManager.flags.EnableTimestampWaitForQueues.set(1); DebugManager.flags.EnableTimestampWaitForQueues.set(1);
@@ -774,19 +774,83 @@ HWTEST_F(TimestampPacketTests, givenOOQAndEnableTimestampWaitForQueuesWhenFinish
cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
cmdQ->flush(); cmdQ->flush();
EXPECT_EQ(1u, deferredTimestampPackets->peekNodes().size()); EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size());
EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size()); EXPECT_EQ(0u, timestampPacketContainer->peekNodes().size());
ASSERT_GT(deferredTimestampPackets->peekNodes().size(), 0u);
ASSERT_GT(timestampPacketContainer->peekNodes().size(), 0u);
typename FamilyType::TimestampPacketType timestampData[] = {2, 2, 2, 2};
for (uint32_t i = 0; i < deferredTimestampPackets->peekNodes()[0]->getPacketsUsed(); i++) {
deferredTimestampPackets->peekNodes()[0]->assignDataToAllTimestamps(i, timestampData);
timestampPacketContainer->peekNodes()[0]->assignDataToAllTimestamps(i, timestampData);
}
cmdQ->finish(); cmdQ->finish();
EXPECT_EQ(csr.waitForCompletionWithTimeoutTaskCountCalled, 0u); EXPECT_EQ(csr.waitForCompletionWithTimeoutTaskCountCalled, 1u);
cmdQ.reset();
}
HWTEST_F(TimestampPacketTests, givenOOQAndWithoutEventWhenEnqueueCalledThenMoveCurrentNodeToDeferredContainer) {
DebugManagerStateRestore restorer;
DebugManager.flags.UpdateTaskCountFromWait.set(3);
DebugManager.flags.EnableTimestampWaitForQueues.set(1);
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
csr.timestampPacketWriteEnabled = true;
csr.callBaseWaitForCompletionWithTimeout = false;
cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0};
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context, device.get(), props);
TimestampPacketContainer *deferredTimestampPackets = cmdQ->deferredTimestampPackets.get();
TimestampPacketContainer *timestampPacketContainer = cmdQ->timestampPacketContainer.get();
cl_event event;
cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event);
EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size());
EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size());
cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
cmdQ->flush();
EXPECT_EQ(1u, deferredTimestampPackets->peekNodes().size());
EXPECT_EQ(0u, timestampPacketContainer->peekNodes().size());
cmdQ->finish();
clReleaseEvent(event);
cmdQ.reset();
}
HWTEST_F(TimestampPacketTests, givenEventWithLatestTaskCountWhenWaitCalledThenClearDeferredNodes) {
DebugManagerStateRestore restorer;
DebugManager.flags.UpdateTaskCountFromWait.set(3);
DebugManager.flags.EnableTimestampWaitForQueues.set(1);
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
csr.timestampPacketWriteEnabled = true;
csr.callBaseWaitForCompletionWithTimeout = false;
cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0};
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context, device.get(), props);
TimestampPacketContainer *deferredTimestampPackets = cmdQ->deferredTimestampPackets.get();
TimestampPacketContainer *timestampPacketContainer = cmdQ->timestampPacketContainer.get();
cl_event event1, event2;
cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event1);
cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event2);
cmdQ->flush();
EXPECT_EQ(1u, deferredTimestampPackets->peekNodes().size());
EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size());
castToObjectOrAbort<Event>(event1)->wait(false, false);
EXPECT_EQ(1u, deferredTimestampPackets->peekNodes().size());
EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size());
castToObjectOrAbort<Event>(event2)->wait(false, false);
EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size());
EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size());
clReleaseEvent(event1);
clReleaseEvent(event2);
cmdQ.reset(); cmdQ.reset();
} }
@@ -815,30 +879,28 @@ HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitForQueuesWhenFinishThenCa
cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
cmdQ->flush(); cmdQ->flush();
EXPECT_EQ(1u, deferredTimestampPackets->peekNodes().size()); EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size());
EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size()); EXPECT_EQ(0u, timestampPacketContainer->peekNodes().size());
VariableBackup<volatile TagAddressType *> backupPauseAddress(&CpuIntrinsicsTests::pauseAddress); VariableBackup<volatile TagAddressType *> backupPauseAddress(&CpuIntrinsicsTests::pauseAddress);
VariableBackup<TaskCountType> backupPauseValue(&CpuIntrinsicsTests::pauseValue); VariableBackup<TaskCountType> backupPauseValue(&CpuIntrinsicsTests::pauseValue);
VariableBackup<uint32_t> backupPauseOffset(&CpuIntrinsicsTests::pauseOffset); VariableBackup<uint32_t> backupPauseOffset(&CpuIntrinsicsTests::pauseOffset);
VariableBackup<std::function<void()>> backupSetupPauseAddress(&CpuIntrinsicsTests::setupPauseAddress); VariableBackup<std::function<void()>> backupSetupPauseAddress(&CpuIntrinsicsTests::setupPauseAddress);
ASSERT_GT(deferredTimestampPackets->peekNodes().size(), 0u); auto &csr = cmdQ->getGpgpuCommandStreamReceiver();
ASSERT_GT(timestampPacketContainer->peekNodes().size(), 0u); *csr.getTagAddress() = 0;
deferredTimestampPackets->peekNodes()[0]->setPacketsUsed(1u);
timestampPacketContainer->peekNodes()[0]->setPacketsUsed(1u);
CpuIntrinsicsTests::pauseAddress = reinterpret_cast<volatile TagAddressType *>(const_cast<void *>(timestampPacketContainer->peekNodes()[0]->getContextEndAddress(0u))); CpuIntrinsicsTests::pauseAddress = csr.getTagAddress();
CpuIntrinsicsTests::pauseValue = 2u; CpuIntrinsicsTests::pauseValue = 3u;
CpuIntrinsicsTests::setupPauseAddress = [&]() { CpuIntrinsicsTests::setupPauseAddress = [&]() {
CpuIntrinsicsTests::pauseAddress = reinterpret_cast<volatile TagAddressType *>(const_cast<void *>(deferredTimestampPackets->peekNodes()[0]->getContextEndAddress(0u))); CpuIntrinsicsTests::pauseAddress = csr.getTagAddress();
}; };
CpuIntrinsicsTests::pauseCounter = 0u; CpuIntrinsicsTests::pauseCounter = 0u;
EXPECT_FALSE(device->getUltCommandStreamReceiver<FamilyType>().downloadAllocationCalled); EXPECT_FALSE(device->getUltCommandStreamReceiver<FamilyType>().downloadAllocationCalled);
cmdQ->finish(); cmdQ->finish();
EXPECT_EQ(2u, CpuIntrinsicsTests::pauseCounter); EXPECT_EQ(1u, CpuIntrinsicsTests::pauseCounter);
EXPECT_TRUE(device->getUltCommandStreamReceiver<FamilyType>().downloadAllocationCalled); EXPECT_TRUE(device->getUltCommandStreamReceiver<FamilyType>().downloadAllocationCalled);
cmdQ.reset(); cmdQ.reset();
@@ -852,12 +914,16 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingToO
TimestampPacketContainer *deferredTimestampPackets = cmdQ->deferredTimestampPackets.get(); TimestampPacketContainer *deferredTimestampPackets = cmdQ->deferredTimestampPackets.get();
cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); cl_event event;
cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event);
EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size()); EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size());
cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(1u, deferredTimestampPackets->peekNodes().size()); EXPECT_EQ(1u, deferredTimestampPackets->peekNodes().size());
clReleaseEvent(event);
} }
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingBlockingThenTrackOwnershipUntilQueueIsCompleted) { HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingBlockingThenTrackOwnershipUntilQueueIsCompleted) {

View File

@@ -480,6 +480,62 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockedBlitEnqueueWhenUnblockingThenMake
EXPECT_TRUE(bcsCsr->isMadeResident(eventDependency->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(), bcsCsr->taskCount)); EXPECT_TRUE(bcsCsr->isMadeResident(eventDependency->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(), bcsCsr->taskCount));
} }
HWTEST_TEMPLATED_F(BcsBufferTests, givenEventWithLatestTaskCountWhenWaitCalledThenClearDeferredNodes) {
auto mockCmdQ = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
auto bufferForBlt = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal));
bufferForBlt->forceDisallowCPUCopy = true;
TimestampPacketContainer *deferredTimestampPackets = mockCmdQ->deferredTimestampPackets.get();
TimestampPacketContainer *timestampPacketContainer = mockCmdQ->timestampPacketContainer.get();
cl_event event1, event2;
mockCmdQ->enqueueReadBuffer(bufferForBlt.get(), CL_FALSE, 0, 1, &hostPtr, nullptr, 0, nullptr, &event1);
mockCmdQ->enqueueReadBuffer(bufferForBlt.get(), CL_FALSE, 0, 1, &hostPtr, nullptr, 0, nullptr, &event2);
mockCmdQ->taskCount++;
auto event1Obj = castToObjectOrAbort<Event>(event1);
auto event2Obj = castToObjectOrAbort<Event>(event2);
size_t expectedSize = 1;
if (mockCmdQ->isCacheFlushForBcsRequired()) {
expectedSize += 2;
}
EXPECT_EQ(expectedSize, deferredTimestampPackets->peekNodes().size());
EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size());
// gpgpu task count not equal
{
event1Obj->wait(false, false);
EXPECT_EQ(expectedSize, deferredTimestampPackets->peekNodes().size());
EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size());
event2Obj->wait(false, false);
EXPECT_EQ(expectedSize, deferredTimestampPackets->peekNodes().size());
EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size());
}
event1Obj->updateTaskCount(mockCmdQ->taskCount, event1Obj->peekBcsTaskCountFromCommandQueue() - 1);
event2Obj->updateTaskCount(mockCmdQ->taskCount, event1Obj->peekBcsTaskCountFromCommandQueue());
// gpgpu and bcs task count equal
{
event1Obj->wait(false, false);
EXPECT_EQ(expectedSize, deferredTimestampPackets->peekNodes().size());
EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size());
event2Obj->wait(false, false);
EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size());
EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size());
}
clReleaseEvent(event1);
clReleaseEvent(event2);
}
HWTEST_TEMPLATED_F(BcsBufferTests, givenMapAllocationWhenEnqueueingReadOrWriteBufferThenStoreMapAllocationInDispatchParameters) { HWTEST_TEMPLATED_F(BcsBufferTests, givenMapAllocationWhenEnqueueingReadOrWriteBufferThenStoreMapAllocationInDispatchParameters) {
DebugManager.flags.DisableZeroCopyForBuffers.set(true); DebugManager.flags.DisableZeroCopyForBuffers.set(true);
auto mockCmdQ = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get()); auto mockCmdQ = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());