mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-24 21:18:24 +08:00
fix: cache flush dependency for queue blocked
Related-to: NEO-9872, HSD-18038461954 Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
e01d34741d
commit
90df4b298b
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -9,6 +9,7 @@
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/command_stream/preemption.h"
|
||||
#include "shared/source/device/device.h"
|
||||
#include "shared/source/helpers/bcs_ccs_dependency_pair_container.h"
|
||||
#include "shared/source/helpers/engine_control.h"
|
||||
#include "shared/source/helpers/hw_info.h"
|
||||
#include "shared/source/os_interface/os_context.h"
|
||||
@@ -401,7 +402,8 @@ class CommandQueueHw : public CommandQueue {
|
||||
EventBuilder &externalEventBuilder,
|
||||
std::unique_ptr<PrintfHandler> &&printfHandler,
|
||||
CommandStreamReceiver *bcsCsr,
|
||||
TagNodeBase *multiRootDeviceSyncNode);
|
||||
TagNodeBase *multiRootDeviceSyncNode,
|
||||
CsrDependencyContainer *csrDependencies);
|
||||
|
||||
CompletionStamp enqueueCommandWithoutKernel(Surface **surfaces,
|
||||
size_t surfaceCount,
|
||||
@@ -449,6 +451,7 @@ class CommandQueueHw : public CommandQueue {
|
||||
|
||||
protected:
|
||||
MOCKABLE_VIRTUAL void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo){};
|
||||
MOCKABLE_VIRTUAL bool prepareCsrDependency(CsrDependencies &csrDeps, CsrDependencyContainer &dependencyTags, TimestampPacketDependencies ×tampPacketDependencies, TagAllocatorBase *allocator, bool blockQueue);
|
||||
size_t calculateHostPtrSizeForImage(const size_t *region, size_t rowPitch, size_t slicePitch, Image *image);
|
||||
|
||||
cl_int enqueueReadWriteBufferOnCpuWithMemoryTransfer(cl_command_type commandType, Buffer *buffer,
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/command_stream/wait_status.h"
|
||||
#include "shared/source/direct_submission/relaxed_ordering_helper.h"
|
||||
#include "shared/source/helpers/bcs_ccs_dependency_pair_container.h"
|
||||
#include "shared/source/helpers/engine_node_helper.h"
|
||||
#include "shared/source/helpers/flat_batch_buffer_helper.h"
|
||||
#include "shared/source/helpers/flush_stamp.h"
|
||||
@@ -457,7 +458,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
eventBuilder,
|
||||
std::move(printfHandler),
|
||||
nullptr,
|
||||
multiRootEventSyncStamp);
|
||||
multiRootEventSyncStamp,
|
||||
nullptr);
|
||||
}
|
||||
|
||||
if (deferredTimestampPackets.get()) {
|
||||
@@ -994,7 +996,8 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
|
||||
EventBuilder &externalEventBuilder,
|
||||
std::unique_ptr<PrintfHandler> &&printfHandler,
|
||||
CommandStreamReceiver *bcsCsr,
|
||||
TagNodeBase *multiRootDeviceSyncNode) {
|
||||
TagNodeBase *multiRootDeviceSyncNode,
|
||||
CsrDependencyContainer *dependencyTags) {
|
||||
|
||||
TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this);
|
||||
|
||||
@@ -1033,9 +1036,8 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
|
||||
|
||||
storeTimestampPackets = (timestampPacketContainer != nullptr);
|
||||
}
|
||||
|
||||
if (enqueueProperties.operation != EnqueueProperties::Operation::gpuKernel) {
|
||||
command = std::make_unique<CommandWithoutKernel>(*this, blockedCommandsData);
|
||||
command = std::make_unique<CommandWithoutKernel>(*this, blockedCommandsData, dependencyTags);
|
||||
} else {
|
||||
// store task data in event
|
||||
std::vector<Surface *> allSurfaces;
|
||||
@@ -1244,6 +1246,23 @@ size_t CommandQueueHw<GfxFamily>::calculateHostPtrSizeForImage(const size_t *reg
|
||||
return Image::calculateHostPtrSize(region, dstRowPitch, dstSlicePitch, bytesPerPixel, image->getImageDesc().image_type);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool CommandQueueHw<GfxFamily>::prepareCsrDependency(CsrDependencies &csrDeps, CsrDependencyContainer &dependencyTags, TimestampPacketDependencies ×tampPacketDependencies, TagAllocatorBase *allocator, bool blockQueue) {
|
||||
for (auto &dependentCsr : csrDeps.csrWithMultiEngineDependencies) {
|
||||
auto tag = allocator->getTag();
|
||||
timestampPacketDependencies.multiCsrDependencies.add(tag);
|
||||
if (!blockQueue) {
|
||||
bool submitStatus = dependentCsr->submitDependencyUpdate(tag);
|
||||
if (!submitStatus) {
|
||||
return submitStatus;
|
||||
}
|
||||
} else {
|
||||
dependencyTags.push_back(std::make_pair(dependentCsr, tag));
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool CommandQueueHw<GfxFamily>::isSplitEnqueueBlitNeeded(TransferDirection transferDirection, size_t transferSize, CommandStreamReceiver &csr) {
|
||||
auto bcsSplit = getDevice().isBcsSplitSupported() &&
|
||||
@@ -1438,14 +1457,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
|
||||
if (isCacheFlushForBcsRequired() && gpgpuSubmission) {
|
||||
timestampPacketDependencies.cacheFlushNodes.add(allocator->getTag());
|
||||
}
|
||||
for (auto &dependentCsr : csrDeps.csrWithMultiEngineDependencies) {
|
||||
auto tag = allocator->getTag();
|
||||
timestampPacketDependencies.multiCsrDependencies.add(tag);
|
||||
bool submitStatus = dependentCsr->submitDependencyUpdate(tag);
|
||||
if (!submitStatus) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
}
|
||||
|
||||
obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, clearAllDependencies, bcsCsr);
|
||||
csrDeps.timestampPacketContainer.push_back(×tampPacketDependencies.previousEnqueueNodes);
|
||||
|
||||
@@ -1472,6 +1484,13 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
|
||||
gpgpuCommandStream = obtainCommandStream<cmdType>(csrDeps, true, blockQueue, multiDispatchInfo, eventsRequest, blockedCommandsData, nullptr, 0, false, false);
|
||||
gpgpuCommandStreamStart = gpgpuCommandStream->getUsed();
|
||||
}
|
||||
CsrDependencyContainer dependencyTags;
|
||||
if (csrDeps.csrWithMultiEngineDependencies.size() > 0) {
|
||||
bool submitStatus = prepareCsrDependency(csrDeps, dependencyTags, timestampPacketDependencies, allocator, blockQueue);
|
||||
if (!submitStatus) {
|
||||
return CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
}
|
||||
|
||||
blitPropertiesContainer.push_back(processDispatchForBlitEnqueue(bcsCsr, multiDispatchInfo, timestampPacketDependencies,
|
||||
eventsRequest, gpgpuCommandStream, cmdType, blockQueue, multiRootEventSyncStamp));
|
||||
@@ -1501,7 +1520,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
|
||||
updateFromCompletionStamp(completionStamp, pEventBuilder->getEvent());
|
||||
|
||||
if (blockQueue) {
|
||||
enqueueBlocked(cmdType, nullptr, 0, multiDispatchInfo, timestampPacketDependencies, blockedCommandsData, enqueueProperties, eventsRequest, *pEventBuilder, nullptr, &bcsCsr, multiRootEventSyncStamp);
|
||||
enqueueBlocked(cmdType, nullptr, 0, multiDispatchInfo, timestampPacketDependencies, blockedCommandsData, enqueueProperties, eventsRequest, *pEventBuilder, nullptr, &bcsCsr, multiRootEventSyncStamp, &dependencyTags);
|
||||
|
||||
if (gpgpuSubmission) {
|
||||
if (debugManager.flags.ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission.get() == 1) {
|
||||
|
||||
Reference in New Issue
Block a user