fix: cache flush dependency for queue blocked
Related-to: NEO-9872, HSD-18038461954 Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
parent
e01d34741d
commit
90df4b298b
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -9,6 +9,7 @@
|
|||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/command_stream/preemption.h"
|
||||
#include "shared/source/device/device.h"
|
||||
#include "shared/source/helpers/bcs_ccs_dependency_pair_container.h"
|
||||
#include "shared/source/helpers/engine_control.h"
|
||||
#include "shared/source/helpers/hw_info.h"
|
||||
#include "shared/source/os_interface/os_context.h"
|
||||
|
@ -401,7 +402,8 @@ class CommandQueueHw : public CommandQueue {
|
|||
EventBuilder &externalEventBuilder,
|
||||
std::unique_ptr<PrintfHandler> &&printfHandler,
|
||||
CommandStreamReceiver *bcsCsr,
|
||||
TagNodeBase *multiRootDeviceSyncNode);
|
||||
TagNodeBase *multiRootDeviceSyncNode,
|
||||
CsrDependencyContainer *csrDependencies);
|
||||
|
||||
CompletionStamp enqueueCommandWithoutKernel(Surface **surfaces,
|
||||
size_t surfaceCount,
|
||||
|
@ -449,6 +451,7 @@ class CommandQueueHw : public CommandQueue {
|
|||
|
||||
protected:
|
||||
MOCKABLE_VIRTUAL void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo){};
|
||||
MOCKABLE_VIRTUAL bool prepareCsrDependency(CsrDependencies &csrDeps, CsrDependencyContainer &dependencyTags, TimestampPacketDependencies ×tampPacketDependencies, TagAllocatorBase *allocator, bool blockQueue);
|
||||
size_t calculateHostPtrSizeForImage(const size_t *region, size_t rowPitch, size_t slicePitch, Image *image);
|
||||
|
||||
cl_int enqueueReadWriteBufferOnCpuWithMemoryTransfer(cl_command_type commandType, Buffer *buffer,
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/command_stream/wait_status.h"
|
||||
#include "shared/source/direct_submission/relaxed_ordering_helper.h"
|
||||
#include "shared/source/helpers/bcs_ccs_dependency_pair_container.h"
|
||||
#include "shared/source/helpers/engine_node_helper.h"
|
||||
#include "shared/source/helpers/flat_batch_buffer_helper.h"
|
||||
#include "shared/source/helpers/flush_stamp.h"
|
||||
|
@ -457,7 +458,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
|||
eventBuilder,
|
||||
std::move(printfHandler),
|
||||
nullptr,
|
||||
multiRootEventSyncStamp);
|
||||
multiRootEventSyncStamp,
|
||||
nullptr);
|
||||
}
|
||||
|
||||
if (deferredTimestampPackets.get()) {
|
||||
|
@ -994,7 +996,8 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
|
|||
EventBuilder &externalEventBuilder,
|
||||
std::unique_ptr<PrintfHandler> &&printfHandler,
|
||||
CommandStreamReceiver *bcsCsr,
|
||||
TagNodeBase *multiRootDeviceSyncNode) {
|
||||
TagNodeBase *multiRootDeviceSyncNode,
|
||||
CsrDependencyContainer *dependencyTags) {
|
||||
|
||||
TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this);
|
||||
|
||||
|
@ -1033,9 +1036,8 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
|
|||
|
||||
storeTimestampPackets = (timestampPacketContainer != nullptr);
|
||||
}
|
||||
|
||||
if (enqueueProperties.operation != EnqueueProperties::Operation::gpuKernel) {
|
||||
command = std::make_unique<CommandWithoutKernel>(*this, blockedCommandsData);
|
||||
command = std::make_unique<CommandWithoutKernel>(*this, blockedCommandsData, dependencyTags);
|
||||
} else {
|
||||
// store task data in event
|
||||
std::vector<Surface *> allSurfaces;
|
||||
|
@ -1244,6 +1246,23 @@ size_t CommandQueueHw<GfxFamily>::calculateHostPtrSizeForImage(const size_t *reg
|
|||
return Image::calculateHostPtrSize(region, dstRowPitch, dstSlicePitch, bytesPerPixel, image->getImageDesc().image_type);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool CommandQueueHw<GfxFamily>::prepareCsrDependency(CsrDependencies &csrDeps, CsrDependencyContainer &dependencyTags, TimestampPacketDependencies ×tampPacketDependencies, TagAllocatorBase *allocator, bool blockQueue) {
|
||||
for (auto &dependentCsr : csrDeps.csrWithMultiEngineDependencies) {
|
||||
auto tag = allocator->getTag();
|
||||
timestampPacketDependencies.multiCsrDependencies.add(tag);
|
||||
if (!blockQueue) {
|
||||
bool submitStatus = dependentCsr->submitDependencyUpdate(tag);
|
||||
if (!submitStatus) {
|
||||
return submitStatus;
|
||||
}
|
||||
} else {
|
||||
dependencyTags.push_back(std::make_pair(dependentCsr, tag));
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool CommandQueueHw<GfxFamily>::isSplitEnqueueBlitNeeded(TransferDirection transferDirection, size_t transferSize, CommandStreamReceiver &csr) {
|
||||
auto bcsSplit = getDevice().isBcsSplitSupported() &&
|
||||
|
@ -1438,14 +1457,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
|
|||
if (isCacheFlushForBcsRequired() && gpgpuSubmission) {
|
||||
timestampPacketDependencies.cacheFlushNodes.add(allocator->getTag());
|
||||
}
|
||||
for (auto &dependentCsr : csrDeps.csrWithMultiEngineDependencies) {
|
||||
auto tag = allocator->getTag();
|
||||
timestampPacketDependencies.multiCsrDependencies.add(tag);
|
||||
bool submitStatus = dependentCsr->submitDependencyUpdate(tag);
|
||||
if (!submitStatus) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
}
|
||||
|
||||
obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, clearAllDependencies, bcsCsr);
|
||||
csrDeps.timestampPacketContainer.push_back(×tampPacketDependencies.previousEnqueueNodes);
|
||||
|
||||
|
@ -1472,6 +1484,13 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
|
|||
gpgpuCommandStream = obtainCommandStream<cmdType>(csrDeps, true, blockQueue, multiDispatchInfo, eventsRequest, blockedCommandsData, nullptr, 0, false, false);
|
||||
gpgpuCommandStreamStart = gpgpuCommandStream->getUsed();
|
||||
}
|
||||
CsrDependencyContainer dependencyTags;
|
||||
if (csrDeps.csrWithMultiEngineDependencies.size() > 0) {
|
||||
bool submitStatus = prepareCsrDependency(csrDeps, dependencyTags, timestampPacketDependencies, allocator, blockQueue);
|
||||
if (!submitStatus) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
}
|
||||
|
||||
blitPropertiesContainer.push_back(processDispatchForBlitEnqueue(bcsCsr, multiDispatchInfo, timestampPacketDependencies,
|
||||
eventsRequest, gpgpuCommandStream, cmdType, blockQueue, multiRootEventSyncStamp));
|
||||
|
@ -1501,7 +1520,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
|
|||
updateFromCompletionStamp(completionStamp, pEventBuilder->getEvent());
|
||||
|
||||
if (blockQueue) {
|
||||
enqueueBlocked(cmdType, nullptr, 0, multiDispatchInfo, timestampPacketDependencies, blockedCommandsData, enqueueProperties, eventsRequest, *pEventBuilder, nullptr, &bcsCsr, multiRootEventSyncStamp);
|
||||
enqueueBlocked(cmdType, nullptr, 0, multiDispatchInfo, timestampPacketDependencies, blockedCommandsData, enqueueProperties, eventsRequest, *pEventBuilder, nullptr, &bcsCsr, multiRootEventSyncStamp, &dependencyTags);
|
||||
|
||||
if (gpgpuSubmission) {
|
||||
if (debugManager.flags.ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission.get() == 1) {
|
||||
|
|
|
@ -130,7 +130,7 @@ CommandComputeKernel::CommandComputeKernel(CommandQueue &commandQueue, std::uniq
|
|||
bool flushDC, bool usesSLM, uint32_t commandType, std::unique_ptr<PrintfHandler> &&printfHandler,
|
||||
PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount,
|
||||
TagNodeBase *multiRootDeviceSyncNode)
|
||||
: Command(commandQueue, kernelOperation), surfaces(std::move(surfaces)), flushDC(flushDC), slmUsed(usesSLM),
|
||||
: Command(commandQueue, kernelOperation, nullptr), surfaces(std::move(surfaces)), flushDC(flushDC), slmUsed(usesSLM),
|
||||
commandType(commandType), printfHandler(std::move(printfHandler)), kernel(kernel),
|
||||
kernelCount(kernelCount), preemptionMode(preemptionMode), multiRootDeviceSyncNode(multiRootDeviceSyncNode) {
|
||||
UNRECOVERABLE_IF(nullptr == this->kernel);
|
||||
|
@ -326,6 +326,7 @@ TaskCountType CommandWithoutKernel::dispatchBlitOperation() {
|
|||
blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies->cacheFlushNodes);
|
||||
blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies->previousEnqueueNodes);
|
||||
blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies->barrierNodes);
|
||||
blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies->multiCsrDependencies);
|
||||
blitProperties.outputTimestampPacket = currentTimestampPacketNodes->peekNodes()[0];
|
||||
|
||||
if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
|
||||
|
@ -348,7 +349,13 @@ CompletionStamp &CommandWithoutKernel::submit(TaskCountType taskLevel, bool term
|
|||
this->terminated = true;
|
||||
return completionStamp;
|
||||
}
|
||||
|
||||
for (auto &tagCsrPair : csrDependencies) {
|
||||
bool submitStatus = tagCsrPair.first->submitDependencyUpdate(tagCsrPair.second);
|
||||
if (!submitStatus) {
|
||||
completionStamp.taskCount = CompletionStamp::gpuHang;
|
||||
return completionStamp;
|
||||
}
|
||||
}
|
||||
auto &commandStreamReceiver = commandQueue.getGpgpuCommandStreamReceiver();
|
||||
|
||||
if (!kernelOperation) {
|
||||
|
@ -532,6 +539,10 @@ void Command::makeTimestampPacketsResident(CommandStreamReceiver &commandStreamR
|
|||
|
||||
Command::Command(CommandQueue &commandQueue) : commandQueue(commandQueue) {}
|
||||
|
||||
Command::Command(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> &kernelOperation)
|
||||
: commandQueue(commandQueue), kernelOperation(std::move(kernelOperation)) {}
|
||||
Command::Command(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> &kernelOperation, CsrDependencyContainer *csrDependencies)
|
||||
: commandQueue(commandQueue), kernelOperation(std::move(kernelOperation)) {
|
||||
if (csrDependencies) {
|
||||
this->csrDependencies = *csrDependencies;
|
||||
}
|
||||
}
|
||||
} // namespace NEO
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -7,6 +7,7 @@
|
|||
|
||||
#pragma once
|
||||
#include "shared/source/command_stream/linear_stream.h"
|
||||
#include "shared/source/helpers/bcs_ccs_dependency_pair_container.h"
|
||||
#include "shared/source/helpers/blit_properties.h"
|
||||
#include "shared/source/helpers/completion_stamp.h"
|
||||
#include "shared/source/helpers/map_operation_type.h"
|
||||
|
@ -87,7 +88,7 @@ class Command : public IFNode<Command> {
|
|||
|
||||
Command() = delete;
|
||||
Command(CommandQueue &commandQueue);
|
||||
Command(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> &kernelOperation);
|
||||
Command(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> &kernelOperation, CsrDependencyContainer *csrDependencies);
|
||||
|
||||
~Command() override;
|
||||
virtual LinearStream *getCommandStream() {
|
||||
|
@ -108,6 +109,7 @@ class Command : public IFNode<Command> {
|
|||
std::unique_ptr<TimestampPacketDependencies> timestampPacketDependencies;
|
||||
EventsRequest eventsRequest = {0, nullptr, nullptr};
|
||||
std::vector<cl_event> eventsWaitlist;
|
||||
CsrDependencyContainer csrDependencies;
|
||||
};
|
||||
|
||||
class CommandMapUnmap : public Command {
|
||||
|
|
|
@ -1377,7 +1377,7 @@ HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenBlockedEventWhenWaitingForCom
|
|||
EXPECT_EQ(2u, ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
|
||||
|
||||
clWaitForEvents(1, &outEvent1);
|
||||
EXPECT_EQ(2u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
|
||||
EXPECT_EQ(1u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
|
||||
EXPECT_EQ(1u, ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
|
||||
|
||||
clReleaseEvent(outEvent1);
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include "shared/source/gmm_helper/gmm.h"
|
||||
#include "shared/source/helpers/array_count.h"
|
||||
#include "shared/source/helpers/basic_math.h"
|
||||
#include "shared/source/helpers/bcs_ccs_dependency_pair_container.h"
|
||||
#include "shared/source/helpers/engine_node_helper.h"
|
||||
#include "shared/source/helpers/timestamp_packet.h"
|
||||
#include "shared/source/memory_manager/internal_allocation_storage.h"
|
||||
|
@ -2514,6 +2515,76 @@ TEST_F(CommandQueueWithTimestampPacketTests, givenQueueWhenSettingAndQueryingLas
|
|||
}
|
||||
}
|
||||
|
||||
HWTEST_F(CommandQueueWithTimestampPacketTests, givedDependencyBetweenCsrWhenPrepareDependencyUpdateCalledThenNewTagAddedToTimestampDependencies) {
|
||||
MockContext context{};
|
||||
auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&context, context.getDevice(0), nullptr);
|
||||
auto dependentCsr = std::make_unique<MockCommandStreamReceiver>(*context.getDevice(0)->getExecutionEnvironment(), context.getDevice(0)->getRootDeviceIndex(), 1);
|
||||
TimestampPacketDependencies dependencies{};
|
||||
CsrDependencies csrDeps;
|
||||
csrDeps.csrWithMultiEngineDependencies.insert(dependentCsr.get());
|
||||
CsrDependencyContainer dependencyMap;
|
||||
TagAllocatorBase *allocator = mockCmdQ->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator();
|
||||
bool blockQueue = false;
|
||||
mockCmdQ->prepareCsrDependency(csrDeps, dependencyMap, dependencies, allocator, blockQueue);
|
||||
EXPECT_EQ(dependencies.multiCsrDependencies.peekNodes().size(), 1u);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandQueueWithTimestampPacketTests, givedNoDependencyBetweenCsrWhenPrepareDependencyUpdateCalledThenTagIsNotAddedToTimestampDependencies) {
|
||||
MockContext context{};
|
||||
auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&context, context.getDevice(0), nullptr);
|
||||
TimestampPacketDependencies dependencies{};
|
||||
CsrDependencies csrDeps;
|
||||
CsrDependencyContainer dependencyMap;
|
||||
TagAllocatorBase *allocator = mockCmdQ->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator();
|
||||
bool blockQueue = false;
|
||||
mockCmdQ->prepareCsrDependency(csrDeps, dependencyMap, dependencies, allocator, blockQueue);
|
||||
EXPECT_EQ(dependencies.multiCsrDependencies.peekNodes().size(), 0u);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandQueueWithTimestampPacketTests, givedDependencyBetweenCsrWhenPrepareDependencyUpdateCalledForNonBlockedQueueThenSubmitDependencyUpdateCalled) {
|
||||
MockContext context{};
|
||||
auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&context, context.getDevice(0), nullptr);
|
||||
auto dependentCsr = std::make_unique<MockCommandStreamReceiver>(*context.getDevice(0)->getExecutionEnvironment(), context.getDevice(0)->getRootDeviceIndex(), 1);
|
||||
TimestampPacketDependencies dependencies{};
|
||||
CsrDependencies csrDeps;
|
||||
csrDeps.csrWithMultiEngineDependencies.insert(dependentCsr.get());
|
||||
CsrDependencyContainer dependencyMap;
|
||||
TagAllocatorBase *allocator = mockCmdQ->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator();
|
||||
bool blockQueue = false;
|
||||
mockCmdQ->prepareCsrDependency(csrDeps, dependencyMap, dependencies, allocator, blockQueue);
|
||||
EXPECT_EQ(dependentCsr->submitDependencyUpdateCalledTimes, 1u);
|
||||
EXPECT_EQ(dependencyMap.size(), 0u);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandQueueWithTimestampPacketTests, givedDependencyBetweenCsrWhenPrepareDependencyUpdateCalledForBlockedQueueThenDependencyMapHasOneItem) {
|
||||
MockContext context{};
|
||||
auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&context, context.getDevice(0), nullptr);
|
||||
auto dependentCsr = std::make_unique<MockCommandStreamReceiver>(*context.getDevice(0)->getExecutionEnvironment(), context.getDevice(0)->getRootDeviceIndex(), 1);
|
||||
TimestampPacketDependencies dependencies{};
|
||||
CsrDependencies csrDeps;
|
||||
csrDeps.csrWithMultiEngineDependencies.insert(dependentCsr.get());
|
||||
CsrDependencyContainer dependencyMap;
|
||||
TagAllocatorBase *allocator = mockCmdQ->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator();
|
||||
bool blockQueue = true;
|
||||
mockCmdQ->prepareCsrDependency(csrDeps, dependencyMap, dependencies, allocator, blockQueue);
|
||||
EXPECT_EQ(dependentCsr->submitDependencyUpdateCalledTimes, 0u);
|
||||
EXPECT_EQ(dependencyMap.size(), 1u);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandQueueWithTimestampPacketTests, givedDependencyBetweenCsrWhenSubmitDependencyUpdateReturnsFalseThenProcessDependencyReturnsFalse) {
|
||||
MockContext context{};
|
||||
auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&context, context.getDevice(0), nullptr);
|
||||
auto dependentCsr = std::make_unique<MockCommandStreamReceiver>(*context.getDevice(0)->getExecutionEnvironment(), context.getDevice(0)->getRootDeviceIndex(), 1);
|
||||
TimestampPacketDependencies dependencies{};
|
||||
CsrDependencies csrDeps;
|
||||
csrDeps.csrWithMultiEngineDependencies.insert(dependentCsr.get());
|
||||
CsrDependencyContainer dependencyMap;
|
||||
TagAllocatorBase *allocator = mockCmdQ->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator();
|
||||
bool blockQueue = false;
|
||||
dependentCsr->submitDependencyUpdateReturnValue = false;
|
||||
EXPECT_FALSE(mockCmdQ->prepareCsrDependency(csrDeps, dependencyMap, dependencies, allocator, blockQueue));
|
||||
}
|
||||
|
||||
using KernelExecutionTypesTests = DispatchFlagsTests;
|
||||
HWTEST_F(KernelExecutionTypesTests, givenConcurrentKernelWhileDoingNonBlockedEnqueueThenCorrectKernelTypeIsSetInCSR) {
|
||||
using CsrType = MockCsrHw2<FamilyType>;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2019-2023 Intel Corporation
|
||||
* Copyright (C) 2019-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -167,7 +167,7 @@ HWTEST_F(EnqueueHandlerTest, givenNonBlitPropertyWhenEnqueueIsBlockedThenDontReg
|
|||
Surface *surfaces[] = {nullptr};
|
||||
mockCmdQ->enqueueBlocked(CL_COMMAND_MARKER, surfaces, size_t(0), multiDispatchInfo, timestampPacketDependencies,
|
||||
blockedCommandsData, enqueuePropertiesForDependencyFlush, eventsRequest,
|
||||
eventBuilder, std::unique_ptr<PrintfHandler>(nullptr), nullptr, nullptr);
|
||||
eventBuilder, std::unique_ptr<PrintfHandler>(nullptr), nullptr, nullptr, nullptr);
|
||||
EXPECT_FALSE(blockedCommandsDataForDependencyFlush->blitEnqueue);
|
||||
}
|
||||
|
||||
|
@ -200,7 +200,7 @@ HWTEST_F(EnqueueHandlerTest, givenBlitPropertyWhenEnqueueIsBlockedThenRegisterBl
|
|||
Surface *surfaces[] = {nullptr};
|
||||
mockCmdQ->enqueueBlocked(CL_COMMAND_READ_BUFFER, surfaces, size_t(0), multiDispatchInfo, timestampPacketDependencies,
|
||||
blockedCommandsData, enqueuePropertiesForBlitEnqueue, eventsRequest,
|
||||
eventBuilder, std::unique_ptr<PrintfHandler>(nullptr), mockCmdQ->getBcsForAuxTranslation(), nullptr);
|
||||
eventBuilder, std::unique_ptr<PrintfHandler>(nullptr), mockCmdQ->getBcsForAuxTranslation(), nullptr, nullptr);
|
||||
EXPECT_TRUE(blockedCommandsDataForBlitEnqueue->blitEnqueue);
|
||||
EXPECT_EQ(blitProperties.srcAllocation, blockedCommandsDataForBlitEnqueue->blitPropertiesContainer.begin()->srcAllocation);
|
||||
EXPECT_EQ(blitProperties.dstAllocation, blockedCommandsDataForBlitEnqueue->blitPropertiesContainer.begin()->dstAllocation);
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/helpers/bcs_ccs_dependency_pair_container.h"
|
||||
#include "shared/source/memory_manager/allocation_properties.h"
|
||||
#include "shared/source/memory_manager/internal_allocation_storage.h"
|
||||
#include "shared/test/common/mocks/mock_csr.h"
|
||||
|
@ -372,7 +373,7 @@ HWTEST_F(DispatchFlagsTests, givenCommandWithoutKernelWhenSubmitThenPassCorrectD
|
|||
auto cmdStream = new LinearStream(device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 1, AllocationType::commandBuffer, device->getDeviceBitfield()}));
|
||||
auto kernelOperation = std::make_unique<KernelOperation>(cmdStream, *mockCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
|
||||
kernelOperation->setHeaps(ih1, ih2, ih3);
|
||||
std::unique_ptr<Command> command(new CommandWithoutKernel(*mockCmdQ, kernelOperation));
|
||||
std::unique_ptr<Command> command(new CommandWithoutKernel(*mockCmdQ, kernelOperation, nullptr));
|
||||
command->setTimestampPacketNode(*mockCmdQ->timestampPacketContainer, std::move(timestampPacketDependencies));
|
||||
|
||||
command->submit(20, false);
|
||||
|
@ -393,6 +394,64 @@ HWTEST_F(DispatchFlagsTests, givenCommandWithoutKernelWhenSubmitThenPassCorrectD
|
|||
EXPECT_FALSE(mockCsr->passedDispatchFlags.epilogueRequired);
|
||||
}
|
||||
|
||||
HWTEST_F(DispatchFlagsTests, givenCsrDependencyWhenSubmitCommandWithoutKernelThenDependencyUpdateWasCalled) {
|
||||
using CsrType = MockCsr1<FamilyType>;
|
||||
setUpImpl<CsrType>();
|
||||
|
||||
auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), nullptr);
|
||||
auto mockCsr = static_cast<CsrType *>(&mockCmdQ->getGpgpuCommandStreamReceiver());
|
||||
auto dependentCsr = std::make_unique<MockCommandStreamReceiver>(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield());
|
||||
|
||||
mockCsr->timestampPacketWriteEnabled = true;
|
||||
mockCmdQ->timestampPacketContainer = std::make_unique<TimestampPacketContainer>();
|
||||
IndirectHeap *ih1 = nullptr, *ih2 = nullptr, *ih3 = nullptr;
|
||||
TimestampPacketDependencies timestampPacketDependencies;
|
||||
mockCmdQ->allocateHeapMemory(IndirectHeap::Type::dynamicState, 1, ih1);
|
||||
mockCmdQ->allocateHeapMemory(IndirectHeap::Type::indirectObject, 1, ih2);
|
||||
mockCmdQ->allocateHeapMemory(IndirectHeap::Type::surfaceState, 1, ih3);
|
||||
|
||||
auto cmdStream = new LinearStream(device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 1, AllocationType::commandBuffer, device->getDeviceBitfield()}));
|
||||
auto kernelOperation = std::make_unique<KernelOperation>(cmdStream, *mockCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
|
||||
kernelOperation->setHeaps(ih1, ih2, ih3);
|
||||
CsrDependencyContainer dependencyMap;
|
||||
auto tag = mockCmdQ->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag();
|
||||
dependencyMap.push_back(std::make_pair(dependentCsr.get(), tag));
|
||||
std::unique_ptr<Command> command(new CommandWithoutKernel(*mockCmdQ, kernelOperation, &dependencyMap));
|
||||
command->setTimestampPacketNode(*mockCmdQ->timestampPacketContainer, std::move(timestampPacketDependencies));
|
||||
|
||||
command->submit(20, false);
|
||||
EXPECT_EQ(dependentCsr->submitDependencyUpdateCalledTimes, 1u);
|
||||
}
|
||||
|
||||
HWTEST_F(DispatchFlagsTests, givenCsrDependencyWhendependencyUpdateReturnsFalseThenSubmitReturnGpuHang) {
|
||||
using CsrType = MockCsr1<FamilyType>;
|
||||
setUpImpl<CsrType>();
|
||||
|
||||
auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), nullptr);
|
||||
auto mockCsr = static_cast<CsrType *>(&mockCmdQ->getGpgpuCommandStreamReceiver());
|
||||
auto dependentCsr = std::make_unique<MockCommandStreamReceiver>(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield());
|
||||
|
||||
mockCsr->timestampPacketWriteEnabled = true;
|
||||
mockCmdQ->timestampPacketContainer = std::make_unique<TimestampPacketContainer>();
|
||||
IndirectHeap *ih1 = nullptr, *ih2 = nullptr, *ih3 = nullptr;
|
||||
TimestampPacketDependencies timestampPacketDependencies;
|
||||
mockCmdQ->allocateHeapMemory(IndirectHeap::Type::dynamicState, 1, ih1);
|
||||
mockCmdQ->allocateHeapMemory(IndirectHeap::Type::indirectObject, 1, ih2);
|
||||
mockCmdQ->allocateHeapMemory(IndirectHeap::Type::surfaceState, 1, ih3);
|
||||
|
||||
auto cmdStream = new LinearStream(device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 1, AllocationType::commandBuffer, device->getDeviceBitfield()}));
|
||||
auto kernelOperation = std::make_unique<KernelOperation>(cmdStream, *mockCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
|
||||
kernelOperation->setHeaps(ih1, ih2, ih3);
|
||||
CsrDependencyContainer dependencyMap;
|
||||
auto tag = mockCmdQ->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag();
|
||||
dependencyMap.push_back(std::make_pair(dependentCsr.get(), tag));
|
||||
std::unique_ptr<Command> command(new CommandWithoutKernel(*mockCmdQ, kernelOperation, &dependencyMap));
|
||||
command->setTimestampPacketNode(*mockCmdQ->timestampPacketContainer, std::move(timestampPacketDependencies));
|
||||
dependentCsr->submitDependencyUpdateReturnValue = false;
|
||||
auto stamp = command->submit(20, false);
|
||||
EXPECT_EQ(stamp.taskCount, CompletionStamp::gpuHang);
|
||||
}
|
||||
|
||||
HWTEST_F(DispatchFlagsTests, givenCommandComputeKernelWhenSubmitThenPassCorrectDispatchHints) {
|
||||
using CsrType = MockCsr1<FamilyType>;
|
||||
setUpImpl<CsrType>();
|
||||
|
|
|
@ -281,6 +281,7 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
|
|||
using BaseClass::obtainCommandStream;
|
||||
using BaseClass::obtainNewTimestampPacketNodes;
|
||||
using BaseClass::overrideEngine;
|
||||
using BaseClass::prepareCsrDependency;
|
||||
using BaseClass::processDispatchForKernels;
|
||||
using BaseClass::relaxedOrderingForGpgpuAllowed;
|
||||
using BaseClass::requiresCacheFlushAfterWalker;
|
||||
|
|
|
@ -20,6 +20,7 @@ set(NEO_CORE_HELPERS
|
|||
${CMAKE_CURRENT_SOURCE_DIR}/array_count.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/aux_translation.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/basic_math.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/bcs_ccs_dependency_pair_container.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/bindless_heaps_helper.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/bindless_heaps_helper.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/bit_helpers.h
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
/*
|
||||
* Copyright (C) 2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <vector>
|
||||
|
||||
namespace NEO {
|
||||
class CommandStreamReceiver;
|
||||
class TagNodeBase;
|
||||
using CsrDependencyContainer = std::vector<std::pair<CommandStreamReceiver *, TagNodeBase *>>;
|
||||
} // namespace NEO
|
|
@ -78,7 +78,10 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
|
|||
|
||||
SubmissionStatus flushTagUpdate() override { return SubmissionStatus::success; };
|
||||
void updateTagFromWait() override{};
|
||||
bool submitDependencyUpdate(TagNodeBase *tag) override { return true; };
|
||||
bool submitDependencyUpdate(TagNodeBase *tag) override {
|
||||
submitDependencyUpdateCalledTimes++;
|
||||
return submitDependencyUpdateReturnValue;
|
||||
}
|
||||
bool isUpdateTagFromWaitEnabled() override { return false; };
|
||||
|
||||
void writeMemoryAub(aub_stream::AllocationParams &allocationParams) override {
|
||||
|
@ -243,6 +246,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
|
|||
uint32_t writeMemoryAubCalled = 0;
|
||||
uint32_t makeResidentCalledTimes = 0;
|
||||
uint32_t downloadAllocationsCalledCount = 0;
|
||||
uint32_t submitDependencyUpdateCalledTimes = 0;
|
||||
int hostPtrSurfaceCreationMutexLockCount = 0;
|
||||
bool multiOsContextCapable = false;
|
||||
bool memoryCompressionEnabled = false;
|
||||
|
@ -259,6 +263,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
|
|||
BatchBuffer latestFlushedBatchBuffer = {};
|
||||
QueueThrottle getLastDirectSubmissionThrottleReturnValue = QueueThrottle::MEDIUM;
|
||||
bool getAcLineConnectedReturnValue = true;
|
||||
bool submitDependencyUpdateReturnValue = true;
|
||||
};
|
||||
|
||||
class MockCommandStreamReceiverWithFailingSubmitBatch : public MockCommandStreamReceiver {
|
||||
|
|
Loading…
Reference in New Issue