mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-23 20:12:03 +08:00
fix: dont allocate TSP for OOQ without Event
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
c892b8c6f3
commit
d96cf5846a
@@ -1281,17 +1281,10 @@ WaitStatus CommandQueue::waitForAllEngines(bool blockedQueue, PrintfHandler *pri
|
|||||||
return WaitStatus::GpuHang;
|
return WaitStatus::GpuHang;
|
||||||
}
|
}
|
||||||
|
|
||||||
TimestampPacketContainer nodesToRelease;
|
|
||||||
if (deferredTimestampPackets) {
|
|
||||||
deferredTimestampPackets->swapNodes(nodesToRelease);
|
|
||||||
}
|
|
||||||
TimestampPacketContainer multiRootSyncNodesToRelease;
|
|
||||||
if (deferredMultiRootSyncNodes.get()) {
|
|
||||||
deferredMultiRootSyncNodes->swapNodes(multiRootSyncNodesToRelease);
|
|
||||||
}
|
|
||||||
|
|
||||||
waitStatus = waitUntilComplete(taskCount, activeBcsStates, flushStamp->peekStamp(), false, cleanTemporaryAllocationsList, waitedOnTimestamps);
|
waitStatus = waitUntilComplete(taskCount, activeBcsStates, flushStamp->peekStamp(), false, cleanTemporaryAllocationsList, waitedOnTimestamps);
|
||||||
|
|
||||||
|
releaseDeferredNodes();
|
||||||
|
|
||||||
if (printfHandler) {
|
if (printfHandler) {
|
||||||
if (!printfHandler->printEnqueueOutput()) {
|
if (!printfHandler->printEnqueueOutput()) {
|
||||||
return WaitStatus::GpuHang;
|
return WaitStatus::GpuHang;
|
||||||
@@ -1378,4 +1371,15 @@ bool CommandQueue::migrateMultiGraphicsAllocationsIfRequired(const BuiltinOpPara
|
|||||||
return migrationHandled;
|
return migrationHandled;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CommandQueue::releaseDeferredNodes() {
|
||||||
|
TimestampPacketContainer nodesToRelease;
|
||||||
|
if (deferredTimestampPackets) {
|
||||||
|
deferredTimestampPackets->swapNodes(nodesToRelease);
|
||||||
|
}
|
||||||
|
TimestampPacketContainer multiRootSyncNodesToRelease;
|
||||||
|
if (deferredMultiRootSyncNodes.get()) {
|
||||||
|
deferredMultiRootSyncNodes->swapNodes(multiRootSyncNodesToRelease);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|||||||
@@ -374,6 +374,10 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
|||||||
|
|
||||||
const std::array<CopyEngineState, bcsInfoMaskSize> &peekActiveBcsStates() const { return bcsStates; }
|
const std::array<CopyEngineState, bcsInfoMaskSize> &peekActiveBcsStates() const { return bcsStates; }
|
||||||
|
|
||||||
|
void releaseDeferredNodes();
|
||||||
|
|
||||||
|
TaskCountType peekTaskCount() const { return taskCount; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void *enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet);
|
void *enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet);
|
||||||
cl_int enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr, EventsRequest &eventsRequest);
|
cl_int enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr, EventsRequest &eventsRequest);
|
||||||
|
|||||||
@@ -171,11 +171,13 @@ bool CommandQueueHw<Family>::waitForTimestamps(Range<CopyEngineState> copyEngine
|
|||||||
using TSPacketType = typename Family::TimestampPacketType;
|
using TSPacketType = typename Family::TimestampPacketType;
|
||||||
bool waited = false;
|
bool waited = false;
|
||||||
|
|
||||||
|
if (isOOQEnabled()) {
|
||||||
|
// TSP for OOQ dispatch is optional. We need to wait for task count.
|
||||||
|
return waited;
|
||||||
|
}
|
||||||
|
|
||||||
if (isWaitForTimestampsEnabled()) {
|
if (isWaitForTimestampsEnabled()) {
|
||||||
waited = waitForTimestampsWithinContainer<TSPacketType>(mainContainer, getGpgpuCommandStreamReceiver(), status);
|
waited = waitForTimestampsWithinContainer<TSPacketType>(mainContainer, getGpgpuCommandStreamReceiver(), status);
|
||||||
if (isOOQEnabled()) {
|
|
||||||
waitForTimestampsWithinContainer<TSPacketType>(deferredContainer, getGpgpuCommandStreamReceiver(), status);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (waited) {
|
if (waited) {
|
||||||
getGpgpuCommandStreamReceiver().downloadAllocations();
|
getGpgpuCommandStreamReceiver().downloadAllocations();
|
||||||
|
|||||||
@@ -211,7 +211,12 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
|||||||
if (isCacheFlushCommand(commandType) || isMarkerWithPostSyncWrite || isNonStallingIoqBarrierWithDependencies) {
|
if (isCacheFlushCommand(commandType) || isMarkerWithPostSyncWrite || isNonStallingIoqBarrierWithDependencies) {
|
||||||
nodesCount = 1;
|
nodesCount = 1;
|
||||||
} else if (!multiDispatchInfo.empty()) {
|
} else if (!multiDispatchInfo.empty()) {
|
||||||
nodesCount = estimateTimestampPacketNodesCount(multiDispatchInfo);
|
if (isOOQEnabled() && !event) {
|
||||||
|
// TSP not needed. Release current node.
|
||||||
|
timestampPacketContainer->moveNodesToNewContainer(*deferredTimestampPackets);
|
||||||
|
} else {
|
||||||
|
nodesCount = estimateTimestampPacketNodesCount(multiDispatchInfo);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isCacheFlushForBcsRequired() && enqueueWithBlitAuxTranslation) {
|
if (isCacheFlushForBcsRequired() && enqueueWithBlitAuxTranslation) {
|
||||||
|
|||||||
@@ -68,7 +68,8 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
|||||||
size_t numWorkGroups[3] = {walkerArgs.numberOfWorkgroups->x, walkerArgs.numberOfWorkgroups->y, walkerArgs.numberOfWorkgroups->z};
|
size_t numWorkGroups[3] = {walkerArgs.numberOfWorkgroups->x, walkerArgs.numberOfWorkgroups->y, walkerArgs.numberOfWorkgroups->z};
|
||||||
auto threadGroupCount = static_cast<uint32_t>(walkerArgs.numberOfWorkgroups->x * walkerArgs.numberOfWorkgroups->y * walkerArgs.numberOfWorkgroups->z);
|
auto threadGroupCount = static_cast<uint32_t>(walkerArgs.numberOfWorkgroups->x * walkerArgs.numberOfWorkgroups->y * walkerArgs.numberOfWorkgroups->z);
|
||||||
|
|
||||||
if (walkerArgs.currentTimestampPacketNodes && commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
if (walkerArgs.currentTimestampPacketNodes && walkerArgs.currentTimestampPacketNodes->peekNodes().size() > 0 &&
|
||||||
|
commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||||
auto timestampPacketNode = walkerArgs.currentTimestampPacketNodes->peekNodes().at(walkerArgs.currentDispatchIndex);
|
auto timestampPacketNode = walkerArgs.currentTimestampPacketNodes->peekNodes().at(walkerArgs.currentDispatchIndex);
|
||||||
GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(&commandStream, &walkerCmd, timestampPacketNode, rootDeviceEnvironment);
|
GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(&commandStream, &walkerCmd, timestampPacketNode, rootDeviceEnvironment);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -79,7 +79,8 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
|||||||
auto &queueCsr = commandQueue.getGpgpuCommandStreamReceiver();
|
auto &queueCsr = commandQueue.getGpgpuCommandStreamReceiver();
|
||||||
|
|
||||||
auto &rootDeviceEnvironment = commandQueue.getDevice().getRootDeviceEnvironment();
|
auto &rootDeviceEnvironment = commandQueue.getDevice().getRootDeviceEnvironment();
|
||||||
if (walkerArgs.currentTimestampPacketNodes && queueCsr.peekTimestampPacketWriteEnabled()) {
|
|
||||||
|
if (walkerArgs.currentTimestampPacketNodes && (walkerArgs.currentTimestampPacketNodes->peekNodes().size() > 0)) {
|
||||||
auto timestampPacket = walkerArgs.currentTimestampPacketNodes->peekNodes().at(walkerArgs.currentDispatchIndex);
|
auto timestampPacket = walkerArgs.currentTimestampPacketNodes->peekNodes().at(walkerArgs.currentDispatchIndex);
|
||||||
GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(&commandStream, &walkerCmd, timestampPacket, rootDeviceEnvironment);
|
GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(&commandStream, &walkerCmd, timestampPacket, rootDeviceEnvironment);
|
||||||
}
|
}
|
||||||
@@ -125,7 +126,8 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
|||||||
auto devices = queueCsr.getOsContext().getDeviceBitfield();
|
auto devices = queueCsr.getOsContext().getDeviceBitfield();
|
||||||
auto partitionWalker = ImplicitScalingHelper::isImplicitScalingEnabled(devices, true);
|
auto partitionWalker = ImplicitScalingHelper::isImplicitScalingEnabled(devices, true);
|
||||||
|
|
||||||
if (walkerArgs.currentTimestampPacketNodes && DebugManager.flags.PrintTimestampPacketUsage.get() == 1) {
|
if (walkerArgs.currentTimestampPacketNodes && walkerArgs.currentTimestampPacketNodes->peekNodes().size() > 0 &&
|
||||||
|
DebugManager.flags.PrintTimestampPacketUsage.get() == 1) {
|
||||||
auto gpuVa = walkerArgs.currentTimestampPacketNodes->peekNodes()[walkerArgs.currentDispatchIndex]->getGpuAddress();
|
auto gpuVa = walkerArgs.currentTimestampPacketNodes->peekNodes()[walkerArgs.currentDispatchIndex]->getGpuAddress();
|
||||||
printf("\nPID:%u, TSP used for Walker: 0x%" PRIX64 ", cmdBuffer pos: 0x%" PRIX64, SysCalls::getProcessId(), gpuVa, commandStream.getCurrentGpuAddressPosition());
|
printf("\nPID:%u, TSP used for Walker: 0x%" PRIX64 ", cmdBuffer pos: 0x%" PRIX64, SysCalls::getProcessId(), gpuVa, commandStream.getCurrentGpuAddressPosition());
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -451,6 +451,19 @@ inline WaitStatus Event::wait(bool blocking, bool useQuickKmdSleep) {
|
|||||||
|
|
||||||
DEBUG_BREAK_IF(this->taskLevel == CompletionStamp::notReady && this->executionStatus >= 0);
|
DEBUG_BREAK_IF(this->taskLevel == CompletionStamp::notReady && this->executionStatus >= 0);
|
||||||
|
|
||||||
|
{
|
||||||
|
TakeOwnershipWrapper<CommandQueue> queueOwnership(*cmdQueue);
|
||||||
|
|
||||||
|
bool releaseNodes = (taskCount == cmdQueue->peekTaskCount());
|
||||||
|
if (bcsState.isValid()) {
|
||||||
|
releaseNodes &= (bcsState.taskCount == cmdQueue->peekBcsTaskCount(bcsState.engineType));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (releaseNodes) {
|
||||||
|
cmdQueue->releaseDeferredNodes();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
auto *allocationStorage = cmdQueue->getGpgpuCommandStreamReceiver().getInternalAllocationStorage();
|
auto *allocationStorage = cmdQueue->getGpgpuCommandStreamReceiver().getInternalAllocationStorage();
|
||||||
allocationStorage->cleanAllocationList(this->taskCount, TEMPORARY_ALLOCATION);
|
allocationStorage->cleanAllocationList(this->taskCount, TEMPORARY_ALLOCATION);
|
||||||
|
|
||||||
|
|||||||
@@ -756,7 +756,7 @@ HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitForQueuesWhenFinishThenWa
|
|||||||
EXPECT_EQ(csr.waitForCompletionWithTimeoutTaskCountCalled, 0u);
|
EXPECT_EQ(csr.waitForCompletionWithTimeoutTaskCountCalled, 0u);
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST_F(TimestampPacketTests, givenOOQAndEnableTimestampWaitForQueuesWhenFinishThenWaitOnTimestamp) {
|
HWTEST_F(TimestampPacketTests, givenOOQAndEnableTimestampWaitForQueuesWhenFinishThenDontWaitOnTimestamp) {
|
||||||
DebugManagerStateRestore restorer;
|
DebugManagerStateRestore restorer;
|
||||||
DebugManager.flags.UpdateTaskCountFromWait.set(3);
|
DebugManager.flags.UpdateTaskCountFromWait.set(3);
|
||||||
DebugManager.flags.EnableTimestampWaitForQueues.set(1);
|
DebugManager.flags.EnableTimestampWaitForQueues.set(1);
|
||||||
@@ -774,19 +774,83 @@ HWTEST_F(TimestampPacketTests, givenOOQAndEnableTimestampWaitForQueuesWhenFinish
|
|||||||
cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||||
cmdQ->flush();
|
cmdQ->flush();
|
||||||
|
|
||||||
EXPECT_EQ(1u, deferredTimestampPackets->peekNodes().size());
|
EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size());
|
||||||
EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size());
|
EXPECT_EQ(0u, timestampPacketContainer->peekNodes().size());
|
||||||
ASSERT_GT(deferredTimestampPackets->peekNodes().size(), 0u);
|
|
||||||
ASSERT_GT(timestampPacketContainer->peekNodes().size(), 0u);
|
|
||||||
typename FamilyType::TimestampPacketType timestampData[] = {2, 2, 2, 2};
|
|
||||||
for (uint32_t i = 0; i < deferredTimestampPackets->peekNodes()[0]->getPacketsUsed(); i++) {
|
|
||||||
deferredTimestampPackets->peekNodes()[0]->assignDataToAllTimestamps(i, timestampData);
|
|
||||||
timestampPacketContainer->peekNodes()[0]->assignDataToAllTimestamps(i, timestampData);
|
|
||||||
}
|
|
||||||
|
|
||||||
cmdQ->finish();
|
cmdQ->finish();
|
||||||
|
|
||||||
EXPECT_EQ(csr.waitForCompletionWithTimeoutTaskCountCalled, 0u);
|
EXPECT_EQ(csr.waitForCompletionWithTimeoutTaskCountCalled, 1u);
|
||||||
|
|
||||||
|
cmdQ.reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST_F(TimestampPacketTests, givenOOQAndWithoutEventWhenEnqueueCalledThenMoveCurrentNodeToDeferredContainer) {
|
||||||
|
DebugManagerStateRestore restorer;
|
||||||
|
DebugManager.flags.UpdateTaskCountFromWait.set(3);
|
||||||
|
DebugManager.flags.EnableTimestampWaitForQueues.set(1);
|
||||||
|
|
||||||
|
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
|
||||||
|
csr.timestampPacketWriteEnabled = true;
|
||||||
|
csr.callBaseWaitForCompletionWithTimeout = false;
|
||||||
|
cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0};
|
||||||
|
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context, device.get(), props);
|
||||||
|
|
||||||
|
TimestampPacketContainer *deferredTimestampPackets = cmdQ->deferredTimestampPackets.get();
|
||||||
|
TimestampPacketContainer *timestampPacketContainer = cmdQ->timestampPacketContainer.get();
|
||||||
|
|
||||||
|
cl_event event;
|
||||||
|
|
||||||
|
cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event);
|
||||||
|
|
||||||
|
EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size());
|
||||||
|
EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size());
|
||||||
|
|
||||||
|
cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||||
|
cmdQ->flush();
|
||||||
|
|
||||||
|
EXPECT_EQ(1u, deferredTimestampPackets->peekNodes().size());
|
||||||
|
EXPECT_EQ(0u, timestampPacketContainer->peekNodes().size());
|
||||||
|
|
||||||
|
cmdQ->finish();
|
||||||
|
|
||||||
|
clReleaseEvent(event);
|
||||||
|
|
||||||
|
cmdQ.reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST_F(TimestampPacketTests, givenEventWithLatestTaskCountWhenWaitCalledThenClearDeferredNodes) {
|
||||||
|
DebugManagerStateRestore restorer;
|
||||||
|
DebugManager.flags.UpdateTaskCountFromWait.set(3);
|
||||||
|
DebugManager.flags.EnableTimestampWaitForQueues.set(1);
|
||||||
|
|
||||||
|
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
|
||||||
|
csr.timestampPacketWriteEnabled = true;
|
||||||
|
csr.callBaseWaitForCompletionWithTimeout = false;
|
||||||
|
cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0};
|
||||||
|
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context, device.get(), props);
|
||||||
|
|
||||||
|
TimestampPacketContainer *deferredTimestampPackets = cmdQ->deferredTimestampPackets.get();
|
||||||
|
TimestampPacketContainer *timestampPacketContainer = cmdQ->timestampPacketContainer.get();
|
||||||
|
|
||||||
|
cl_event event1, event2;
|
||||||
|
|
||||||
|
cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event1);
|
||||||
|
cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event2);
|
||||||
|
cmdQ->flush();
|
||||||
|
|
||||||
|
EXPECT_EQ(1u, deferredTimestampPackets->peekNodes().size());
|
||||||
|
EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size());
|
||||||
|
|
||||||
|
castToObjectOrAbort<Event>(event1)->wait(false, false);
|
||||||
|
EXPECT_EQ(1u, deferredTimestampPackets->peekNodes().size());
|
||||||
|
EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size());
|
||||||
|
|
||||||
|
castToObjectOrAbort<Event>(event2)->wait(false, false);
|
||||||
|
EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size());
|
||||||
|
EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size());
|
||||||
|
|
||||||
|
clReleaseEvent(event1);
|
||||||
|
clReleaseEvent(event2);
|
||||||
|
|
||||||
cmdQ.reset();
|
cmdQ.reset();
|
||||||
}
|
}
|
||||||
@@ -815,30 +879,28 @@ HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitForQueuesWhenFinishThenCa
|
|||||||
cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||||
cmdQ->flush();
|
cmdQ->flush();
|
||||||
|
|
||||||
EXPECT_EQ(1u, deferredTimestampPackets->peekNodes().size());
|
EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size());
|
||||||
EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size());
|
EXPECT_EQ(0u, timestampPacketContainer->peekNodes().size());
|
||||||
|
|
||||||
VariableBackup<volatile TagAddressType *> backupPauseAddress(&CpuIntrinsicsTests::pauseAddress);
|
VariableBackup<volatile TagAddressType *> backupPauseAddress(&CpuIntrinsicsTests::pauseAddress);
|
||||||
VariableBackup<TaskCountType> backupPauseValue(&CpuIntrinsicsTests::pauseValue);
|
VariableBackup<TaskCountType> backupPauseValue(&CpuIntrinsicsTests::pauseValue);
|
||||||
VariableBackup<uint32_t> backupPauseOffset(&CpuIntrinsicsTests::pauseOffset);
|
VariableBackup<uint32_t> backupPauseOffset(&CpuIntrinsicsTests::pauseOffset);
|
||||||
VariableBackup<std::function<void()>> backupSetupPauseAddress(&CpuIntrinsicsTests::setupPauseAddress);
|
VariableBackup<std::function<void()>> backupSetupPauseAddress(&CpuIntrinsicsTests::setupPauseAddress);
|
||||||
|
|
||||||
ASSERT_GT(deferredTimestampPackets->peekNodes().size(), 0u);
|
auto &csr = cmdQ->getGpgpuCommandStreamReceiver();
|
||||||
ASSERT_GT(timestampPacketContainer->peekNodes().size(), 0u);
|
*csr.getTagAddress() = 0;
|
||||||
deferredTimestampPackets->peekNodes()[0]->setPacketsUsed(1u);
|
|
||||||
timestampPacketContainer->peekNodes()[0]->setPacketsUsed(1u);
|
|
||||||
|
|
||||||
CpuIntrinsicsTests::pauseAddress = reinterpret_cast<volatile TagAddressType *>(const_cast<void *>(timestampPacketContainer->peekNodes()[0]->getContextEndAddress(0u)));
|
CpuIntrinsicsTests::pauseAddress = csr.getTagAddress();
|
||||||
CpuIntrinsicsTests::pauseValue = 2u;
|
CpuIntrinsicsTests::pauseValue = 3u;
|
||||||
CpuIntrinsicsTests::setupPauseAddress = [&]() {
|
CpuIntrinsicsTests::setupPauseAddress = [&]() {
|
||||||
CpuIntrinsicsTests::pauseAddress = reinterpret_cast<volatile TagAddressType *>(const_cast<void *>(deferredTimestampPackets->peekNodes()[0]->getContextEndAddress(0u)));
|
CpuIntrinsicsTests::pauseAddress = csr.getTagAddress();
|
||||||
};
|
};
|
||||||
CpuIntrinsicsTests::pauseCounter = 0u;
|
CpuIntrinsicsTests::pauseCounter = 0u;
|
||||||
EXPECT_FALSE(device->getUltCommandStreamReceiver<FamilyType>().downloadAllocationCalled);
|
EXPECT_FALSE(device->getUltCommandStreamReceiver<FamilyType>().downloadAllocationCalled);
|
||||||
|
|
||||||
cmdQ->finish();
|
cmdQ->finish();
|
||||||
|
|
||||||
EXPECT_EQ(2u, CpuIntrinsicsTests::pauseCounter);
|
EXPECT_EQ(1u, CpuIntrinsicsTests::pauseCounter);
|
||||||
EXPECT_TRUE(device->getUltCommandStreamReceiver<FamilyType>().downloadAllocationCalled);
|
EXPECT_TRUE(device->getUltCommandStreamReceiver<FamilyType>().downloadAllocationCalled);
|
||||||
|
|
||||||
cmdQ.reset();
|
cmdQ.reset();
|
||||||
@@ -852,12 +914,16 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingToO
|
|||||||
|
|
||||||
TimestampPacketContainer *deferredTimestampPackets = cmdQ->deferredTimestampPackets.get();
|
TimestampPacketContainer *deferredTimestampPackets = cmdQ->deferredTimestampPackets.get();
|
||||||
|
|
||||||
cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
cl_event event;
|
||||||
|
|
||||||
|
cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event);
|
||||||
EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size());
|
EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size());
|
||||||
|
|
||||||
cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||||
|
|
||||||
EXPECT_EQ(1u, deferredTimestampPackets->peekNodes().size());
|
EXPECT_EQ(1u, deferredTimestampPackets->peekNodes().size());
|
||||||
|
|
||||||
|
clReleaseEvent(event);
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingBlockingThenTrackOwnershipUntilQueueIsCompleted) {
|
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingBlockingThenTrackOwnershipUntilQueueIsCompleted) {
|
||||||
|
|||||||
@@ -480,6 +480,62 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockedBlitEnqueueWhenUnblockingThenMake
|
|||||||
EXPECT_TRUE(bcsCsr->isMadeResident(eventDependency->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(), bcsCsr->taskCount));
|
EXPECT_TRUE(bcsCsr->isMadeResident(eventDependency->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(), bcsCsr->taskCount));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST_TEMPLATED_F(BcsBufferTests, givenEventWithLatestTaskCountWhenWaitCalledThenClearDeferredNodes) {
|
||||||
|
auto mockCmdQ = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
|
||||||
|
|
||||||
|
auto bufferForBlt = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal));
|
||||||
|
bufferForBlt->forceDisallowCPUCopy = true;
|
||||||
|
|
||||||
|
TimestampPacketContainer *deferredTimestampPackets = mockCmdQ->deferredTimestampPackets.get();
|
||||||
|
TimestampPacketContainer *timestampPacketContainer = mockCmdQ->timestampPacketContainer.get();
|
||||||
|
|
||||||
|
cl_event event1, event2;
|
||||||
|
|
||||||
|
mockCmdQ->enqueueReadBuffer(bufferForBlt.get(), CL_FALSE, 0, 1, &hostPtr, nullptr, 0, nullptr, &event1);
|
||||||
|
mockCmdQ->enqueueReadBuffer(bufferForBlt.get(), CL_FALSE, 0, 1, &hostPtr, nullptr, 0, nullptr, &event2);
|
||||||
|
|
||||||
|
mockCmdQ->taskCount++;
|
||||||
|
|
||||||
|
auto event1Obj = castToObjectOrAbort<Event>(event1);
|
||||||
|
auto event2Obj = castToObjectOrAbort<Event>(event2);
|
||||||
|
|
||||||
|
size_t expectedSize = 1;
|
||||||
|
if (mockCmdQ->isCacheFlushForBcsRequired()) {
|
||||||
|
expectedSize += 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPECT_EQ(expectedSize, deferredTimestampPackets->peekNodes().size());
|
||||||
|
EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size());
|
||||||
|
|
||||||
|
// gpgpu task count not equal
|
||||||
|
{
|
||||||
|
event1Obj->wait(false, false);
|
||||||
|
EXPECT_EQ(expectedSize, deferredTimestampPackets->peekNodes().size());
|
||||||
|
EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size());
|
||||||
|
|
||||||
|
event2Obj->wait(false, false);
|
||||||
|
EXPECT_EQ(expectedSize, deferredTimestampPackets->peekNodes().size());
|
||||||
|
EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size());
|
||||||
|
}
|
||||||
|
|
||||||
|
event1Obj->updateTaskCount(mockCmdQ->taskCount, event1Obj->peekBcsTaskCountFromCommandQueue() - 1);
|
||||||
|
event2Obj->updateTaskCount(mockCmdQ->taskCount, event1Obj->peekBcsTaskCountFromCommandQueue());
|
||||||
|
|
||||||
|
// gpgpu and bcs task count equal
|
||||||
|
{
|
||||||
|
event1Obj->wait(false, false);
|
||||||
|
EXPECT_EQ(expectedSize, deferredTimestampPackets->peekNodes().size());
|
||||||
|
EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size());
|
||||||
|
|
||||||
|
event2Obj->wait(false, false);
|
||||||
|
EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size());
|
||||||
|
EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size());
|
||||||
|
}
|
||||||
|
|
||||||
|
clReleaseEvent(event1);
|
||||||
|
clReleaseEvent(event2);
|
||||||
|
}
|
||||||
|
|
||||||
HWTEST_TEMPLATED_F(BcsBufferTests, givenMapAllocationWhenEnqueueingReadOrWriteBufferThenStoreMapAllocationInDispatchParameters) {
|
HWTEST_TEMPLATED_F(BcsBufferTests, givenMapAllocationWhenEnqueueingReadOrWriteBufferThenStoreMapAllocationInDispatchParameters) {
|
||||||
DebugManager.flags.DisableZeroCopyForBuffers.set(true);
|
DebugManager.flags.DisableZeroCopyForBuffers.set(true);
|
||||||
auto mockCmdQ = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
|
auto mockCmdQ = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
|
||||||
|
|||||||
Reference in New Issue
Block a user