fix: cache flush dependency for queue blocked

Related-to: NEO-9872, HSD-18038461954
Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
Maciej Plewka 2024-05-20 10:39:06 +00:00 committed by Compute-Runtime-Automation
parent e01d34741d
commit 90df4b298b
12 changed files with 215 additions and 28 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2023 Intel Corporation
* Copyright (C) 2018-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -9,6 +9,7 @@
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/command_stream/preemption.h"
#include "shared/source/device/device.h"
#include "shared/source/helpers/bcs_ccs_dependency_pair_container.h"
#include "shared/source/helpers/engine_control.h"
#include "shared/source/helpers/hw_info.h"
#include "shared/source/os_interface/os_context.h"
@ -401,7 +402,8 @@ class CommandQueueHw : public CommandQueue {
EventBuilder &externalEventBuilder,
std::unique_ptr<PrintfHandler> &&printfHandler,
CommandStreamReceiver *bcsCsr,
TagNodeBase *multiRootDeviceSyncNode);
TagNodeBase *multiRootDeviceSyncNode,
CsrDependencyContainer *csrDependencies);
CompletionStamp enqueueCommandWithoutKernel(Surface **surfaces,
size_t surfaceCount,
@ -449,6 +451,7 @@ class CommandQueueHw : public CommandQueue {
protected:
MOCKABLE_VIRTUAL void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo){};
MOCKABLE_VIRTUAL bool prepareCsrDependency(CsrDependencies &csrDeps, CsrDependencyContainer &dependencyTags, TimestampPacketDependencies &timestampPacketDependencies, TagAllocatorBase *allocator, bool blockQueue);
size_t calculateHostPtrSizeForImage(const size_t *region, size_t rowPitch, size_t slicePitch, Image *image);
cl_int enqueueReadWriteBufferOnCpuWithMemoryTransfer(cl_command_type commandType, Buffer *buffer,

View File

@ -9,6 +9,7 @@
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/command_stream/wait_status.h"
#include "shared/source/direct_submission/relaxed_ordering_helper.h"
#include "shared/source/helpers/bcs_ccs_dependency_pair_container.h"
#include "shared/source/helpers/engine_node_helper.h"
#include "shared/source/helpers/flat_batch_buffer_helper.h"
#include "shared/source/helpers/flush_stamp.h"
@ -457,7 +458,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
eventBuilder,
std::move(printfHandler),
nullptr,
multiRootEventSyncStamp);
multiRootEventSyncStamp,
nullptr);
}
if (deferredTimestampPackets.get()) {
@ -994,7 +996,8 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
EventBuilder &externalEventBuilder,
std::unique_ptr<PrintfHandler> &&printfHandler,
CommandStreamReceiver *bcsCsr,
TagNodeBase *multiRootDeviceSyncNode) {
TagNodeBase *multiRootDeviceSyncNode,
CsrDependencyContainer *dependencyTags) {
TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this);
@ -1033,9 +1036,8 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
storeTimestampPackets = (timestampPacketContainer != nullptr);
}
if (enqueueProperties.operation != EnqueueProperties::Operation::gpuKernel) {
command = std::make_unique<CommandWithoutKernel>(*this, blockedCommandsData);
command = std::make_unique<CommandWithoutKernel>(*this, blockedCommandsData, dependencyTags);
} else {
// store task data in event
std::vector<Surface *> allSurfaces;
@ -1244,6 +1246,23 @@ size_t CommandQueueHw<GfxFamily>::calculateHostPtrSizeForImage(const size_t *reg
return Image::calculateHostPtrSize(region, dstRowPitch, dstSlicePitch, bytesPerPixel, image->getImageDesc().image_type);
}
template <typename GfxFamily>
bool CommandQueueHw<GfxFamily>::prepareCsrDependency(CsrDependencies &csrDeps, CsrDependencyContainer &dependencyTags, TimestampPacketDependencies &timestampPacketDependencies, TagAllocatorBase *allocator, bool blockQueue) {
for (auto &dependentCsr : csrDeps.csrWithMultiEngineDependencies) {
auto tag = allocator->getTag();
timestampPacketDependencies.multiCsrDependencies.add(tag);
if (!blockQueue) {
bool submitStatus = dependentCsr->submitDependencyUpdate(tag);
if (!submitStatus) {
return submitStatus;
}
} else {
dependencyTags.push_back(std::make_pair(dependentCsr, tag));
}
}
return true;
}
template <typename GfxFamily>
bool CommandQueueHw<GfxFamily>::isSplitEnqueueBlitNeeded(TransferDirection transferDirection, size_t transferSize, CommandStreamReceiver &csr) {
auto bcsSplit = getDevice().isBcsSplitSupported() &&
@ -1438,14 +1457,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
if (isCacheFlushForBcsRequired() && gpgpuSubmission) {
timestampPacketDependencies.cacheFlushNodes.add(allocator->getTag());
}
for (auto &dependentCsr : csrDeps.csrWithMultiEngineDependencies) {
auto tag = allocator->getTag();
timestampPacketDependencies.multiCsrDependencies.add(tag);
bool submitStatus = dependentCsr->submitDependencyUpdate(tag);
if (!submitStatus) {
return CL_OUT_OF_RESOURCES;
}
}
obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, clearAllDependencies, bcsCsr);
csrDeps.timestampPacketContainer.push_back(&timestampPacketDependencies.previousEnqueueNodes);
@ -1472,6 +1484,13 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
gpgpuCommandStream = obtainCommandStream<cmdType>(csrDeps, true, blockQueue, multiDispatchInfo, eventsRequest, blockedCommandsData, nullptr, 0, false, false);
gpgpuCommandStreamStart = gpgpuCommandStream->getUsed();
}
CsrDependencyContainer dependencyTags;
if (csrDeps.csrWithMultiEngineDependencies.size() > 0) {
bool submitStatus = prepareCsrDependency(csrDeps, dependencyTags, timestampPacketDependencies, allocator, blockQueue);
if (!submitStatus) {
return CL_OUT_OF_RESOURCES;
}
}
blitPropertiesContainer.push_back(processDispatchForBlitEnqueue(bcsCsr, multiDispatchInfo, timestampPacketDependencies,
eventsRequest, gpgpuCommandStream, cmdType, blockQueue, multiRootEventSyncStamp));
@ -1501,7 +1520,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
updateFromCompletionStamp(completionStamp, pEventBuilder->getEvent());
if (blockQueue) {
enqueueBlocked(cmdType, nullptr, 0, multiDispatchInfo, timestampPacketDependencies, blockedCommandsData, enqueueProperties, eventsRequest, *pEventBuilder, nullptr, &bcsCsr, multiRootEventSyncStamp);
enqueueBlocked(cmdType, nullptr, 0, multiDispatchInfo, timestampPacketDependencies, blockedCommandsData, enqueueProperties, eventsRequest, *pEventBuilder, nullptr, &bcsCsr, multiRootEventSyncStamp, &dependencyTags);
if (gpgpuSubmission) {
if (debugManager.flags.ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission.get() == 1) {

View File

@ -130,7 +130,7 @@ CommandComputeKernel::CommandComputeKernel(CommandQueue &commandQueue, std::uniq
bool flushDC, bool usesSLM, uint32_t commandType, std::unique_ptr<PrintfHandler> &&printfHandler,
PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount,
TagNodeBase *multiRootDeviceSyncNode)
: Command(commandQueue, kernelOperation), surfaces(std::move(surfaces)), flushDC(flushDC), slmUsed(usesSLM),
: Command(commandQueue, kernelOperation, nullptr), surfaces(std::move(surfaces)), flushDC(flushDC), slmUsed(usesSLM),
commandType(commandType), printfHandler(std::move(printfHandler)), kernel(kernel),
kernelCount(kernelCount), preemptionMode(preemptionMode), multiRootDeviceSyncNode(multiRootDeviceSyncNode) {
UNRECOVERABLE_IF(nullptr == this->kernel);
@ -326,6 +326,7 @@ TaskCountType CommandWithoutKernel::dispatchBlitOperation() {
blitProperties.csrDependencies.timestampPacketContainer.push_back(&timestampPacketDependencies->cacheFlushNodes);
blitProperties.csrDependencies.timestampPacketContainer.push_back(&timestampPacketDependencies->previousEnqueueNodes);
blitProperties.csrDependencies.timestampPacketContainer.push_back(&timestampPacketDependencies->barrierNodes);
blitProperties.csrDependencies.timestampPacketContainer.push_back(&timestampPacketDependencies->multiCsrDependencies);
blitProperties.outputTimestampPacket = currentTimestampPacketNodes->peekNodes()[0];
if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
@ -348,7 +349,13 @@ CompletionStamp &CommandWithoutKernel::submit(TaskCountType taskLevel, bool term
this->terminated = true;
return completionStamp;
}
for (auto &tagCsrPair : csrDependencies) {
bool submitStatus = tagCsrPair.first->submitDependencyUpdate(tagCsrPair.second);
if (!submitStatus) {
completionStamp.taskCount = CompletionStamp::gpuHang;
return completionStamp;
}
}
auto &commandStreamReceiver = commandQueue.getGpgpuCommandStreamReceiver();
if (!kernelOperation) {
@ -532,6 +539,10 @@ void Command::makeTimestampPacketsResident(CommandStreamReceiver &commandStreamR
Command::Command(CommandQueue &commandQueue) : commandQueue(commandQueue) {}
Command::Command(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> &kernelOperation)
: commandQueue(commandQueue), kernelOperation(std::move(kernelOperation)) {}
Command::Command(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> &kernelOperation, CsrDependencyContainer *csrDependencies)
: commandQueue(commandQueue), kernelOperation(std::move(kernelOperation)) {
if (csrDependencies) {
this->csrDependencies = *csrDependencies;
}
}
} // namespace NEO

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2023 Intel Corporation
* Copyright (C) 2018-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -7,6 +7,7 @@
#pragma once
#include "shared/source/command_stream/linear_stream.h"
#include "shared/source/helpers/bcs_ccs_dependency_pair_container.h"
#include "shared/source/helpers/blit_properties.h"
#include "shared/source/helpers/completion_stamp.h"
#include "shared/source/helpers/map_operation_type.h"
@ -87,7 +88,7 @@ class Command : public IFNode<Command> {
Command() = delete;
Command(CommandQueue &commandQueue);
Command(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> &kernelOperation);
Command(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> &kernelOperation, CsrDependencyContainer *csrDependencies);
~Command() override;
virtual LinearStream *getCommandStream() {
@ -108,6 +109,7 @@ class Command : public IFNode<Command> {
std::unique_ptr<TimestampPacketDependencies> timestampPacketDependencies;
EventsRequest eventsRequest = {0, nullptr, nullptr};
std::vector<cl_event> eventsWaitlist;
CsrDependencyContainer csrDependencies;
};
class CommandMapUnmap : public Command {

View File

@ -1377,7 +1377,7 @@ HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenBlockedEventWhenWaitingForCom
EXPECT_EQ(2u, ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
clWaitForEvents(1, &outEvent1);
EXPECT_EQ(2u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
EXPECT_EQ(1u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
EXPECT_EQ(1u, ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
clReleaseEvent(outEvent1);

View File

@ -11,6 +11,7 @@
#include "shared/source/gmm_helper/gmm.h"
#include "shared/source/helpers/array_count.h"
#include "shared/source/helpers/basic_math.h"
#include "shared/source/helpers/bcs_ccs_dependency_pair_container.h"
#include "shared/source/helpers/engine_node_helper.h"
#include "shared/source/helpers/timestamp_packet.h"
#include "shared/source/memory_manager/internal_allocation_storage.h"
@ -2514,6 +2515,76 @@ TEST_F(CommandQueueWithTimestampPacketTests, givenQueueWhenSettingAndQueryingLas
}
}
HWTEST_F(CommandQueueWithTimestampPacketTests, givedDependencyBetweenCsrWhenPrepareDependencyUpdateCalledThenNewTagAddedToTimestampDependencies) {
MockContext context{};
auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&context, context.getDevice(0), nullptr);
auto dependentCsr = std::make_unique<MockCommandStreamReceiver>(*context.getDevice(0)->getExecutionEnvironment(), context.getDevice(0)->getRootDeviceIndex(), 1);
TimestampPacketDependencies dependencies{};
CsrDependencies csrDeps;
csrDeps.csrWithMultiEngineDependencies.insert(dependentCsr.get());
CsrDependencyContainer dependencyMap;
TagAllocatorBase *allocator = mockCmdQ->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator();
bool blockQueue = false;
mockCmdQ->prepareCsrDependency(csrDeps, dependencyMap, dependencies, allocator, blockQueue);
EXPECT_EQ(dependencies.multiCsrDependencies.peekNodes().size(), 1u);
}
HWTEST_F(CommandQueueWithTimestampPacketTests, givedNoDependencyBetweenCsrWhenPrepareDependencyUpdateCalledThenTagIsNotAddedToTimestampDependencies) {
MockContext context{};
auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&context, context.getDevice(0), nullptr);
TimestampPacketDependencies dependencies{};
CsrDependencies csrDeps;
CsrDependencyContainer dependencyMap;
TagAllocatorBase *allocator = mockCmdQ->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator();
bool blockQueue = false;
mockCmdQ->prepareCsrDependency(csrDeps, dependencyMap, dependencies, allocator, blockQueue);
EXPECT_EQ(dependencies.multiCsrDependencies.peekNodes().size(), 0u);
}
HWTEST_F(CommandQueueWithTimestampPacketTests, givedDependencyBetweenCsrWhenPrepareDependencyUpdateCalledForNonBlockedQueueThenSubmitDependencyUpdateCalled) {
MockContext context{};
auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&context, context.getDevice(0), nullptr);
auto dependentCsr = std::make_unique<MockCommandStreamReceiver>(*context.getDevice(0)->getExecutionEnvironment(), context.getDevice(0)->getRootDeviceIndex(), 1);
TimestampPacketDependencies dependencies{};
CsrDependencies csrDeps;
csrDeps.csrWithMultiEngineDependencies.insert(dependentCsr.get());
CsrDependencyContainer dependencyMap;
TagAllocatorBase *allocator = mockCmdQ->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator();
bool blockQueue = false;
mockCmdQ->prepareCsrDependency(csrDeps, dependencyMap, dependencies, allocator, blockQueue);
EXPECT_EQ(dependentCsr->submitDependencyUpdateCalledTimes, 1u);
EXPECT_EQ(dependencyMap.size(), 0u);
}
HWTEST_F(CommandQueueWithTimestampPacketTests, givedDependencyBetweenCsrWhenPrepareDependencyUpdateCalledForBlockedQueueThenDependencyMapHasOneItem) {
MockContext context{};
auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&context, context.getDevice(0), nullptr);
auto dependentCsr = std::make_unique<MockCommandStreamReceiver>(*context.getDevice(0)->getExecutionEnvironment(), context.getDevice(0)->getRootDeviceIndex(), 1);
TimestampPacketDependencies dependencies{};
CsrDependencies csrDeps;
csrDeps.csrWithMultiEngineDependencies.insert(dependentCsr.get());
CsrDependencyContainer dependencyMap;
TagAllocatorBase *allocator = mockCmdQ->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator();
bool blockQueue = true;
mockCmdQ->prepareCsrDependency(csrDeps, dependencyMap, dependencies, allocator, blockQueue);
EXPECT_EQ(dependentCsr->submitDependencyUpdateCalledTimes, 0u);
EXPECT_EQ(dependencyMap.size(), 1u);
}
HWTEST_F(CommandQueueWithTimestampPacketTests, givedDependencyBetweenCsrWhenSubmitDependencyUpdateReturnsFalseThenProcessDependencyReturnsFalse) {
MockContext context{};
auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&context, context.getDevice(0), nullptr);
auto dependentCsr = std::make_unique<MockCommandStreamReceiver>(*context.getDevice(0)->getExecutionEnvironment(), context.getDevice(0)->getRootDeviceIndex(), 1);
TimestampPacketDependencies dependencies{};
CsrDependencies csrDeps;
csrDeps.csrWithMultiEngineDependencies.insert(dependentCsr.get());
CsrDependencyContainer dependencyMap;
TagAllocatorBase *allocator = mockCmdQ->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator();
bool blockQueue = false;
dependentCsr->submitDependencyUpdateReturnValue = false;
EXPECT_FALSE(mockCmdQ->prepareCsrDependency(csrDeps, dependencyMap, dependencies, allocator, blockQueue));
}
using KernelExecutionTypesTests = DispatchFlagsTests;
HWTEST_F(KernelExecutionTypesTests, givenConcurrentKernelWhileDoingNonBlockedEnqueueThenCorrectKernelTypeIsSetInCSR) {
using CsrType = MockCsrHw2<FamilyType>;

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2023 Intel Corporation
* Copyright (C) 2019-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -167,7 +167,7 @@ HWTEST_F(EnqueueHandlerTest, givenNonBlitPropertyWhenEnqueueIsBlockedThenDontReg
Surface *surfaces[] = {nullptr};
mockCmdQ->enqueueBlocked(CL_COMMAND_MARKER, surfaces, size_t(0), multiDispatchInfo, timestampPacketDependencies,
blockedCommandsData, enqueuePropertiesForDependencyFlush, eventsRequest,
eventBuilder, std::unique_ptr<PrintfHandler>(nullptr), nullptr, nullptr);
eventBuilder, std::unique_ptr<PrintfHandler>(nullptr), nullptr, nullptr, nullptr);
EXPECT_FALSE(blockedCommandsDataForDependencyFlush->blitEnqueue);
}
@ -200,7 +200,7 @@ HWTEST_F(EnqueueHandlerTest, givenBlitPropertyWhenEnqueueIsBlockedThenRegisterBl
Surface *surfaces[] = {nullptr};
mockCmdQ->enqueueBlocked(CL_COMMAND_READ_BUFFER, surfaces, size_t(0), multiDispatchInfo, timestampPacketDependencies,
blockedCommandsData, enqueuePropertiesForBlitEnqueue, eventsRequest,
eventBuilder, std::unique_ptr<PrintfHandler>(nullptr), mockCmdQ->getBcsForAuxTranslation(), nullptr);
eventBuilder, std::unique_ptr<PrintfHandler>(nullptr), mockCmdQ->getBcsForAuxTranslation(), nullptr, nullptr);
EXPECT_TRUE(blockedCommandsDataForBlitEnqueue->blitEnqueue);
EXPECT_EQ(blitProperties.srcAllocation, blockedCommandsDataForBlitEnqueue->blitPropertiesContainer.begin()->srcAllocation);
EXPECT_EQ(blitProperties.dstAllocation, blockedCommandsDataForBlitEnqueue->blitPropertiesContainer.begin()->dstAllocation);

View File

@ -1,10 +1,11 @@
/*
* Copyright (C) 2018-2023 Intel Corporation
* Copyright (C) 2018-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/bcs_ccs_dependency_pair_container.h"
#include "shared/source/memory_manager/allocation_properties.h"
#include "shared/source/memory_manager/internal_allocation_storage.h"
#include "shared/test/common/mocks/mock_csr.h"
@ -372,7 +373,7 @@ HWTEST_F(DispatchFlagsTests, givenCommandWithoutKernelWhenSubmitThenPassCorrectD
auto cmdStream = new LinearStream(device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 1, AllocationType::commandBuffer, device->getDeviceBitfield()}));
auto kernelOperation = std::make_unique<KernelOperation>(cmdStream, *mockCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
kernelOperation->setHeaps(ih1, ih2, ih3);
std::unique_ptr<Command> command(new CommandWithoutKernel(*mockCmdQ, kernelOperation));
std::unique_ptr<Command> command(new CommandWithoutKernel(*mockCmdQ, kernelOperation, nullptr));
command->setTimestampPacketNode(*mockCmdQ->timestampPacketContainer, std::move(timestampPacketDependencies));
command->submit(20, false);
@ -393,6 +394,64 @@ HWTEST_F(DispatchFlagsTests, givenCommandWithoutKernelWhenSubmitThenPassCorrectD
EXPECT_FALSE(mockCsr->passedDispatchFlags.epilogueRequired);
}
HWTEST_F(DispatchFlagsTests, givenCsrDependencyWhenSubmitCommandWithoutKernelThenDependencyUpdateWasCalled) {
using CsrType = MockCsr1<FamilyType>;
setUpImpl<CsrType>();
auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), nullptr);
auto mockCsr = static_cast<CsrType *>(&mockCmdQ->getGpgpuCommandStreamReceiver());
auto dependentCsr = std::make_unique<MockCommandStreamReceiver>(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield());
mockCsr->timestampPacketWriteEnabled = true;
mockCmdQ->timestampPacketContainer = std::make_unique<TimestampPacketContainer>();
IndirectHeap *ih1 = nullptr, *ih2 = nullptr, *ih3 = nullptr;
TimestampPacketDependencies timestampPacketDependencies;
mockCmdQ->allocateHeapMemory(IndirectHeap::Type::dynamicState, 1, ih1);
mockCmdQ->allocateHeapMemory(IndirectHeap::Type::indirectObject, 1, ih2);
mockCmdQ->allocateHeapMemory(IndirectHeap::Type::surfaceState, 1, ih3);
auto cmdStream = new LinearStream(device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 1, AllocationType::commandBuffer, device->getDeviceBitfield()}));
auto kernelOperation = std::make_unique<KernelOperation>(cmdStream, *mockCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
kernelOperation->setHeaps(ih1, ih2, ih3);
CsrDependencyContainer dependencyMap;
auto tag = mockCmdQ->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag();
dependencyMap.push_back(std::make_pair(dependentCsr.get(), tag));
std::unique_ptr<Command> command(new CommandWithoutKernel(*mockCmdQ, kernelOperation, &dependencyMap));
command->setTimestampPacketNode(*mockCmdQ->timestampPacketContainer, std::move(timestampPacketDependencies));
command->submit(20, false);
EXPECT_EQ(dependentCsr->submitDependencyUpdateCalledTimes, 1u);
}
HWTEST_F(DispatchFlagsTests, givenCsrDependencyWhendependencyUpdateReturnsFalseThenSubmitReturnGpuHang) {
using CsrType = MockCsr1<FamilyType>;
setUpImpl<CsrType>();
auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), nullptr);
auto mockCsr = static_cast<CsrType *>(&mockCmdQ->getGpgpuCommandStreamReceiver());
auto dependentCsr = std::make_unique<MockCommandStreamReceiver>(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield());
mockCsr->timestampPacketWriteEnabled = true;
mockCmdQ->timestampPacketContainer = std::make_unique<TimestampPacketContainer>();
IndirectHeap *ih1 = nullptr, *ih2 = nullptr, *ih3 = nullptr;
TimestampPacketDependencies timestampPacketDependencies;
mockCmdQ->allocateHeapMemory(IndirectHeap::Type::dynamicState, 1, ih1);
mockCmdQ->allocateHeapMemory(IndirectHeap::Type::indirectObject, 1, ih2);
mockCmdQ->allocateHeapMemory(IndirectHeap::Type::surfaceState, 1, ih3);
auto cmdStream = new LinearStream(device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 1, AllocationType::commandBuffer, device->getDeviceBitfield()}));
auto kernelOperation = std::make_unique<KernelOperation>(cmdStream, *mockCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
kernelOperation->setHeaps(ih1, ih2, ih3);
CsrDependencyContainer dependencyMap;
auto tag = mockCmdQ->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag();
dependencyMap.push_back(std::make_pair(dependentCsr.get(), tag));
std::unique_ptr<Command> command(new CommandWithoutKernel(*mockCmdQ, kernelOperation, &dependencyMap));
command->setTimestampPacketNode(*mockCmdQ->timestampPacketContainer, std::move(timestampPacketDependencies));
dependentCsr->submitDependencyUpdateReturnValue = false;
auto stamp = command->submit(20, false);
EXPECT_EQ(stamp.taskCount, CompletionStamp::gpuHang);
}
HWTEST_F(DispatchFlagsTests, givenCommandComputeKernelWhenSubmitThenPassCorrectDispatchHints) {
using CsrType = MockCsr1<FamilyType>;
setUpImpl<CsrType>();

View File

@ -281,6 +281,7 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
using BaseClass::obtainCommandStream;
using BaseClass::obtainNewTimestampPacketNodes;
using BaseClass::overrideEngine;
using BaseClass::prepareCsrDependency;
using BaseClass::processDispatchForKernels;
using BaseClass::relaxedOrderingForGpgpuAllowed;
using BaseClass::requiresCacheFlushAfterWalker;

View File

@ -20,6 +20,7 @@ set(NEO_CORE_HELPERS
${CMAKE_CURRENT_SOURCE_DIR}/array_count.h
${CMAKE_CURRENT_SOURCE_DIR}/aux_translation.h
${CMAKE_CURRENT_SOURCE_DIR}/basic_math.h
${CMAKE_CURRENT_SOURCE_DIR}/bcs_ccs_dependency_pair_container.h
${CMAKE_CURRENT_SOURCE_DIR}/bindless_heaps_helper.cpp
${CMAKE_CURRENT_SOURCE_DIR}/bindless_heaps_helper.h
${CMAKE_CURRENT_SOURCE_DIR}/bit_helpers.h

View File

@ -0,0 +1,15 @@
/*
* Copyright (C) 2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <vector>
namespace NEO {
class CommandStreamReceiver;
class TagNodeBase;
using CsrDependencyContainer = std::vector<std::pair<CommandStreamReceiver *, TagNodeBase *>>;
} // namespace NEO

View File

@ -78,7 +78,10 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
SubmissionStatus flushTagUpdate() override { return SubmissionStatus::success; };
void updateTagFromWait() override{};
bool submitDependencyUpdate(TagNodeBase *tag) override { return true; };
bool submitDependencyUpdate(TagNodeBase *tag) override {
submitDependencyUpdateCalledTimes++;
return submitDependencyUpdateReturnValue;
}
bool isUpdateTagFromWaitEnabled() override { return false; };
void writeMemoryAub(aub_stream::AllocationParams &allocationParams) override {
@ -243,6 +246,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
uint32_t writeMemoryAubCalled = 0;
uint32_t makeResidentCalledTimes = 0;
uint32_t downloadAllocationsCalledCount = 0;
uint32_t submitDependencyUpdateCalledTimes = 0;
int hostPtrSurfaceCreationMutexLockCount = 0;
bool multiOsContextCapable = false;
bool memoryCompressionEnabled = false;
@ -259,6 +263,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
BatchBuffer latestFlushedBatchBuffer = {};
QueueThrottle getLastDirectSubmissionThrottleReturnValue = QueueThrottle::MEDIUM;
bool getAcLineConnectedReturnValue = true;
bool submitDependencyUpdateReturnValue = true;
};
class MockCommandStreamReceiverWithFailingSubmitBatch : public MockCommandStreamReceiver {