feature(ocl) use tags to synchronize multi root device events

Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
Maciej Plewka 2023-01-12 09:59:50 +00:00 committed by Compute-Runtime-Automation
parent fecb52ac49
commit 16bc84e27d
59 changed files with 644 additions and 193 deletions

View File

@ -391,7 +391,8 @@ class CommandQueueHw : public CommandQueue {
EventsRequest &eventsRequest,
EventBuilder &externalEventBuilder,
std::unique_ptr<PrintfHandler> &&printfHandler,
CommandStreamReceiver *bcsCsr);
CommandStreamReceiver *bcsCsr,
TagNodeBase *multiRootDeviceSyncNode);
CompletionStamp enqueueCommandWithoutKernel(Surface **surfaces,
size_t surfaceCount,
@ -422,7 +423,7 @@ class CommandQueueHw : public CommandQueue {
TimestampPacketDependencies &timestampPacketDependencies,
const EventsRequest &eventsRequest,
LinearStream *commandStream,
uint32_t commandType, bool queueBlocked);
uint32_t commandType, bool queueBlocked, TagNodeBase *multiRootDeviceEventSync);
void submitCacheFlush(Surface **surfaces,
size_t numSurfaces,
LinearStream *commandStream,
@ -433,6 +434,8 @@ class CommandQueueHw : public CommandQueue {
bool waitForTimestamps(Range<CopyEngineState> copyEnginesToWait, TaskCountType taskCount, WaitStatus &status, TimestampPacketContainer *mainContainer, TimestampPacketContainer *deferredContainer) override;
MOCKABLE_VIRTUAL bool isCacheFlushForBcsRequired() const;
void processSignalMultiRootDeviceNode(LinearStream *commandStream,
TagNodeBase *node);
protected:
MOCKABLE_VIRTUAL void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo){};
@ -473,7 +476,7 @@ class CommandQueueHw : public CommandQueue {
blockedCommandsData = std::make_unique<KernelOperation>(commandStream, *gpgpuCsr.getInternalAllocationStorage());
} else {
commandStream = &getCommandStream<GfxFamily, commandType>(*this, csrDependencies, profilingRequired, perfCountersRequired,
blitEnqueue, multiDispatchInfo, surfaces, numSurfaces, isMarkerWithProfiling, eventsRequest.numEventsInWaitList > 0);
blitEnqueue, multiDispatchInfo, surfaces, numSurfaces, isMarkerWithProfiling, eventsRequest.numEventsInWaitList > 0, eventsRequest.outEvent);
}
return commandStream;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2022 Intel Corporation
* Copyright (C) 2021-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -45,7 +45,7 @@ bool CommandQueueHw<Family>::isCacheFlushCommand(uint32_t commandType) const {
}
template <>
LinearStream &getCommandStream<Family, CL_COMMAND_RESOURCE_BARRIER>(CommandQueue &commandQueue, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounterCmdsSpace, bool blitEnqueue, const MultiDispatchInfo &multiDispatchInfo, Surface **surfaces, size_t numSurfaces, bool isMarkerWithProfiling, bool eventsInWaitList) {
LinearStream &getCommandStream<Family, CL_COMMAND_RESOURCE_BARRIER>(CommandQueue &commandQueue, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounterCmdsSpace, bool blitEnqueue, const MultiDispatchInfo &multiDispatchInfo, Surface **surfaces, size_t numSurfaces, bool isMarkerWithProfiling, bool eventsInWaitList, cl_event *outEvent) {
size_t expectedSizeCS = 0;
[[maybe_unused]] bool usePostSync = false;
if (commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {

View File

@ -177,7 +177,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
BlitPropertiesContainer blitPropertiesContainer;
if (this->context->getRootDeviceIndices().size() > 1) {
eventsRequest.fillCsrDependenciesForTaskCountContainer(csrDeps, computeCommandStreamReceiver);
eventsRequest.fillCsrDependenciesForRootDevices(csrDeps, computeCommandStreamReceiver);
}
const bool enqueueWithBlitAuxTranslation = isBlitAuxTranslationRequired(multiDispatchInfo);
@ -226,7 +226,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
}
if (this->context->getRootDeviceIndices().size() > 1) {
TimestampPacketHelper::programCsrDependenciesForForTaskCountContainer<GfxFamily>(commandStream, csrDeps);
TimestampPacketHelper::programCsrDependenciesForForMultiRootDeviceSyncContainer<GfxFamily>(commandStream, csrDeps);
}
if (enqueueWithBlitAuxTranslation) {
@ -280,6 +280,17 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
} else if (isMarkerWithPostSyncWrite) {
processDispatchForMarker(*this, &commandStream, eventsRequest, csrDeps);
}
TagNodeBase *multiRootEventSyncStamp = nullptr;
if (eventBuilder.getEvent() && eventBuilder.getEvent()->getContext()->getRootDeviceIndices().size() > 1) {
multiRootEventSyncStamp = eventBuilder.getEvent()->getMultiRootTimestampSyncNode();
if (!blockQueue) {
this->getGpgpuCommandStreamReceiver().makeResident(*multiRootEventSyncStamp->getBaseGraphicsAllocation());
}
processSignalMultiRootDeviceNode(&commandStream, multiRootEventSyncStamp);
if (CL_COMMAND_MARKER == commandType) {
flushDependenciesForNonKernelCommand = true;
}
}
CompletionStamp completionStamp = {CompletionStamp::notReady, taskLevel, 0};
const EnqueueProperties enqueueProperties(false, !multiDispatchInfo.empty(), isCacheFlushCommand(commandType),
@ -382,7 +393,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
eventsRequest,
eventBuilder,
std::move(printfHandler),
nullptr);
nullptr,
multiRootEventSyncStamp);
}
if (deferredTimestampPackets.get()) {
@ -497,7 +509,7 @@ BlitProperties CommandQueueHw<GfxFamily>::processDispatchForBlitEnqueue(CommandS
const MultiDispatchInfo &multiDispatchInfo,
TimestampPacketDependencies &timestampPacketDependencies,
const EventsRequest &eventsRequest, LinearStream *commandStream,
uint32_t commandType, bool queueBlocked) {
uint32_t commandType, bool queueBlocked, TagNodeBase *multiRootDeviceEventSync) {
auto blitDirection = ClBlitProperties::obtainBlitDirection(commandType);
auto blitProperties = ClBlitProperties::constructProperties(blitDirection, blitCommandStreamReceiver,
@ -510,7 +522,7 @@ BlitProperties CommandQueueHw<GfxFamily>::processDispatchForBlitEnqueue(CommandS
blitProperties.csrDependencies.timestampPacketContainer.push_back(&timestampPacketDependencies.previousEnqueueNodes);
blitProperties.csrDependencies.timestampPacketContainer.push_back(&timestampPacketDependencies.barrierNodes);
}
blitProperties.multiRootDeviceEventSync = multiRootDeviceEventSync;
auto currentTimestampPacketNode = timestampPacketContainer->peekNodes().at(0);
blitProperties.outputTimestampPacket = currentTimestampPacketNode;
@ -616,7 +628,20 @@ void CommandQueueHw<GfxFamily>::processDispatchForMarker(CommandQueue &commandQu
HardwareInterface<GfxFamily>::dispatchProfilingPerfEndCommands(hwTimeStamps, hwPerfCounter, commandStream, commandQueue);
getGpgpuCommandStreamReceiver().makeResident(*hwTimeStamps->getBaseGraphicsAllocation());
}
template <typename GfxFamily>
void CommandQueueHw<GfxFamily>::processSignalMultiRootDeviceNode(LinearStream *commandStream,
TagNodeBase *node) {
const auto &hwInfo = getDevice().getHardwareInfo();
PipeControlArgs args;
args.dcFlushEnable = MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(true, hwInfo);
MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
*commandStream,
PostSyncMode::ImmediateData,
node->getGpuAddress() + node->getContextEndOffset(),
std::numeric_limits<uint64_t>::max(),
hwInfo,
args);
}
template <typename GfxFamily>
void CommandQueueHw<GfxFamily>::processDispatchForMarkerWithTimestampPacket(CommandQueue &commandQueue,
LinearStream *commandStream,
@ -901,7 +926,8 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
EventsRequest &eventsRequest,
EventBuilder &externalEventBuilder,
std::unique_ptr<PrintfHandler> &&printfHandler,
CommandStreamReceiver *bcsCsr) {
CommandStreamReceiver *bcsCsr,
TagNodeBase *multiRootDeviceSyncNode) {
TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this);
@ -972,7 +998,8 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
std::move(printfHandler),
preemptionMode,
multiDispatchInfo.peekMainKernel(),
(uint32_t)multiDispatchInfo.size());
(uint32_t)multiDispatchInfo.size(),
multiRootDeviceSyncNode);
}
if (storeTimestampPackets) {
command->setTimestampPacketNode(*timestampPacketContainer, std::move(timestampPacketDependencies));
@ -1274,10 +1301,14 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
}
TimestampPacketDependencies timestampPacketDependencies;
TagNodeBase *multiRootEventSyncStamp = nullptr;
BlitPropertiesContainer blitPropertiesContainer;
CsrDependencies csrDeps;
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, bcsCsr, CsrDependencies::DependenciesType::All);
if (this->context->getRootDeviceIndices().size() > 1) {
eventsRequest.fillCsrDependenciesForRootDevices(csrDeps, bcsCsr);
}
auto allocator = bcsCsr.getTimestampPacketAllocator();
if (!blockQueue) {
@ -1304,6 +1335,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
if (eventBuilder.getEvent()) {
eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer);
}
if (eventBuilder.getEvent() && eventBuilder.getEvent()->getContext()->getRootDeviceIndices().size() > 1) {
multiRootEventSyncStamp = eventBuilder.getEvent()->getMultiRootTimestampSyncNode();
bcsCsr.makeResident(*multiRootEventSyncStamp->getBaseGraphicsAllocation());
}
CompletionStamp completionStamp = {CompletionStamp::notReady, taskLevel, 0};
@ -1320,7 +1355,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
}
blitPropertiesContainer.push_back(processDispatchForBlitEnqueue(bcsCsr, multiDispatchInfo, timestampPacketDependencies,
eventsRequest, gpgpuCommandStream, cmdType, blockQueue));
eventsRequest, gpgpuCommandStream, cmdType, blockQueue, multiRootEventSyncStamp));
if (!blockQueue) {
completionStamp = enqueueCommandWithoutKernel(nullptr, 0, gpgpuCommandStream, gpgpuCommandStreamStart, blocking,
@ -1347,7 +1382,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
updateFromCompletionStamp(completionStamp, eventBuilder.getEvent());
if (blockQueue) {
enqueueBlocked(cmdType, nullptr, 0, multiDispatchInfo, timestampPacketDependencies, blockedCommandsData, enqueueProperties, eventsRequest, eventBuilder, nullptr, &bcsCsr);
enqueueBlocked(cmdType, nullptr, 0, multiDispatchInfo, timestampPacketDependencies, blockedCommandsData, enqueueProperties, eventsRequest, eventBuilder, nullptr, &bcsCsr, multiRootEventSyncStamp);
if (gpgpuSubmission) {
if (DebugManager.flags.ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission.get() == 1) {

View File

@ -88,7 +88,7 @@ class GpgpuWalkerHelper {
template <typename GfxFamily>
struct EnqueueOperation {
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
static size_t getTotalSizeRequiredCS(uint32_t eventType, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounters, bool blitEnqueue, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, bool isMarkerWithProfiling, bool eventsInWaitList);
static size_t getTotalSizeRequiredCS(uint32_t eventType, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounters, bool blitEnqueue, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, bool isMarkerWithProfiling, bool eventsInWaitList, cl_event *outEvent);
static size_t getSizeRequiredCS(uint32_t cmdType, bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel, const DispatchInfo &dispatchInfo);
static size_t getSizeRequiredForTimestampPacketWrite();
static size_t getSizeForCacheFlushAfterWalkerCommands(const Kernel &kernel, const CommandQueue &commandQueue);
@ -101,8 +101,8 @@ struct EnqueueOperation {
template <typename GfxFamily, uint32_t eventType>
LinearStream &getCommandStream(CommandQueue &commandQueue, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace,
bool reservePerfCounterCmdsSpace, bool blitEnqueue, const MultiDispatchInfo &multiDispatchInfo,
Surface **surfaces, size_t numSurfaces, bool isMarkerWithProfiling, bool eventsInWaitList) {
size_t expectedSizeCS = EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(eventType, csrDeps, reserveProfilingCmdsSpace, reservePerfCounterCmdsSpace, blitEnqueue, commandQueue, multiDispatchInfo, isMarkerWithProfiling, eventsInWaitList);
Surface **surfaces, size_t numSurfaces, bool isMarkerWithProfiling, bool eventsInWaitList, cl_event *outEvent) {
size_t expectedSizeCS = EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(eventType, csrDeps, reserveProfilingCmdsSpace, reservePerfCounterCmdsSpace, blitEnqueue, commandQueue, multiDispatchInfo, isMarkerWithProfiling, eventsInWaitList, outEvent);
return commandQueue.getCS(expectedSizeCS);
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2022 Intel Corporation
* Copyright (C) 2018-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -165,7 +165,7 @@ size_t GpgpuWalkerHelper<GfxFamily>::getSizeForWaDisableRccRhwoOptimization(cons
}
template <typename GfxFamily>
size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounters, bool blitEnqueue, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, bool isMarkerWithProfiling, bool eventsInWaitlist) {
size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounters, bool blitEnqueue, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, bool isMarkerWithProfiling, bool eventsInWaitlist, cl_event *outEvent) {
size_t expectedSizeCS = 0;
auto &hwInfo = commandQueue.getDevice().getHardwareInfo();
auto &gfxCoreHelper = commandQueue.getDevice().getGfxCoreHelper();
@ -218,8 +218,14 @@ size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, c
if (DebugManager.flags.GpuScratchRegWriteAfterWalker.get() != -1) {
expectedSizeCS += sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM);
}
expectedSizeCS += TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer<GfxFamily>(csrDeps);
expectedSizeCS += TimestampPacketHelper::getRequiredCmdStreamSizeForMultiRootDeviceSyncNodesContainer<GfxFamily>(csrDeps);
if (outEvent) {
auto pEvent = castToObjectOrAbort<Event>(*outEvent);
if ((pEvent->getContext()->getRootDeviceIndices().size() > 1) && (!pEvent->isUserEvent())) {
expectedSizeCS += MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(hwInfo, false);
}
}
expectedSizeCS += MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier(false);
return expectedSizeCS;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2022 Intel Corporation
* Copyright (C) 2018-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -31,6 +31,7 @@ struct HardwareInterfaceWalkerArgs {
size_t localWorkSizes[3] = {};
TagNodeBase *hwTimeStamps = nullptr;
TagNodeBase *hwPerfCounter = nullptr;
TagNodeBase *multiRootDeviceEventStamp = nullptr;
TimestampPacketDependencies *timestampPacketDependencies = nullptr;
TimestampPacketContainer *currentTimestampPacketNodes = nullptr;
const Vec3<size_t> *numberOfWorkgroups = nullptr;

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2022 Intel Corporation
* Copyright (C) 2018-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -133,6 +133,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
walkerArgs.currentTimestampPacketNodes);
walkerArgs.currentDispatchIndex = 0;
for (auto &dispatchInfo : multiDispatchInfo) {
dispatchInfo.dispatchInitCommands(*commandStream, walkerArgs.timestampPacketDependencies, commandQueue.getDevice().getHardwareInfo());
walkerArgs.isMainKernel = (dispatchInfo.getKernel() == mainKernel);

View File

@ -49,7 +49,9 @@ Context::Context(
Context::~Context() {
gtpinNotifyContextDestroy((cl_context)this);
if (multiRootDeviceTimestampPacketAllocator.get() != nullptr) {
multiRootDeviceTimestampPacketAllocator.reset();
}
if (smallBufferPoolAllocator.isAggregatedSmallBuffersEnabled(this)) {
smallBufferPoolAllocator.releaseSmallBufferPool();
}
@ -558,5 +560,15 @@ void Context::BufferPoolAllocator::releaseSmallBufferPool() {
delete this->mainStorage;
this->mainStorage = nullptr;
}
TagAllocatorBase *Context::getMultiRootDeviceTimestampPacketAllocator() {
return multiRootDeviceTimestampPacketAllocator.get();
}
void Context::setMultiRootDeviceTimestampPacketAllocator(std::unique_ptr<TagAllocatorBase> &allocator) {
multiRootDeviceTimestampPacketAllocator = std::move(allocator);
}
std::unique_lock<std::mutex> Context::obtainOwnershipForMultiRootDeviceAllocator() {
return std::unique_lock<std::mutex>(multiRootDeviceAllocatorMtx);
}
} // namespace NEO

View File

@ -37,6 +37,7 @@ class SharingFunctions;
class SVMAllocsManager;
class Program;
class Platform;
class TagAllocatorBase;
template <>
struct OpenCLObjectMapper<_cl_context> {
@ -223,6 +224,9 @@ class Context : public BaseObject<_cl_context> {
BufferPoolAllocator &getBufferPoolAllocator() {
return this->smallBufferPoolAllocator;
}
TagAllocatorBase *getMultiRootDeviceTimestampPacketAllocator();
std::unique_lock<std::mutex> obtainOwnershipForMultiRootDeviceAllocator();
void setMultiRootDeviceTimestampPacketAllocator(std::unique_ptr<TagAllocatorBase> &allocator);
protected:
struct BuiltInKernel {
@ -263,6 +267,8 @@ class Context : public BaseObject<_cl_context> {
uint32_t maxRootDeviceIndex = std::numeric_limits<uint32_t>::max();
cl_bool preferD3dSharedResources = 0u;
ContextType contextType = ContextType::CONTEXT_TYPE_DEFAULT;
std::unique_ptr<TagAllocatorBase> multiRootDeviceTimestampPacketAllocator;
std::mutex multiRootDeviceAllocatorMtx;
bool interopUserSync = false;
bool resolvesRequiredInKernels = false;

View File

@ -131,6 +131,9 @@ Event::~Event() {
if (timeStampNode != nullptr) {
timeStampNode->returnTag();
}
if (multiRootTimeStampSyncNode != nullptr) {
multiRootTimeStampSyncNode->returnTag();
}
if (perfCounterNode != nullptr) {
cmdQueue->getPerfCounters()->deleteQuery(perfCounterNode->getQueryHandleRef());
perfCounterNode->getQueryHandleRef() = {};
@ -883,7 +886,6 @@ TagNodeBase *Event::getHwTimeStampNode() {
}
TagNodeBase *Event::getHwPerfCounterNode() {
if (!perfCounterNode && cmdQueue->getPerfCounters()) {
const uint32_t gpuReportSize = HwPerfCounter::getSize(*(cmdQueue->getPerfCounters()));
perfCounterNode = cmdQueue->getGpgpuCommandStreamReceiver().getEventPerfCountAllocator(gpuReportSize)->getTag();
@ -891,11 +893,27 @@ TagNodeBase *Event::getHwPerfCounterNode() {
return perfCounterNode;
}
TagNodeBase *Event::getMultiRootTimestampSyncNode() {
auto lock = getContext()->obtainOwnershipForMultiRootDeviceAllocator();
if (getContext()->getMultiRootDeviceTimestampPacketAllocator() == nullptr) {
auto allocator = cmdQueue->getGpgpuCommandStreamReceiver().createMultiRootDeviceTimestampPacketAllocator(getContext()->getRootDeviceIndices());
getContext()->setMultiRootDeviceTimestampPacketAllocator(allocator);
}
lock.unlock();
if (multiRootDeviceTimestampPacketContainer.get() == nullptr) {
multiRootDeviceTimestampPacketContainer = std::make_unique<TimestampPacketContainer>();
}
multiRootTimeStampSyncNode = getContext()->getMultiRootDeviceTimestampPacketAllocator()->getTag();
multiRootDeviceTimestampPacketContainer->add(multiRootTimeStampSyncNode);
return multiRootTimeStampSyncNode;
}
void Event::addTimestampPacketNodes(const TimestampPacketContainer &inputTimestampPacketContainer) {
timestampPacketContainer->assignAndIncrementNodesRefCounts(inputTimestampPacketContainer);
}
TimestampPacketContainer *Event::getTimestampPacketNodes() const { return timestampPacketContainer.get(); }
TimestampPacketContainer *Event::getMultiRootDeviceTimestampPacketNodes() const { return multiRootDeviceTimestampPacketContainer.get(); }
bool Event::checkUserEventDependencies(cl_uint numEventsInWaitList, const cl_event *eventWaitList) {
bool userEventsDependencies = false;

View File

@ -115,6 +115,7 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
void addTimestampPacketNodes(const TimestampPacketContainer &inputTimestampPacketContainer);
TimestampPacketContainer *getTimestampPacketNodes() const;
TimestampPacketContainer *getMultiRootDeviceTimestampPacketNodes() const;
bool isPerfCountersEnabled() const {
return perfCountersEnabled;
@ -129,6 +130,7 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
}
TagNodeBase *getHwPerfCounterNode();
TagNodeBase *getMultiRootTimestampSyncNode();
std::unique_ptr<FlushStampTracker> flushStamp;
std::atomic<TaskCountType> taskLevel;
@ -384,8 +386,10 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
bool perfCountersEnabled;
TagNodeBase *timeStampNode = nullptr;
TagNodeBase *perfCounterNode = nullptr;
TagNodeBase *multiRootTimeStampSyncNode = nullptr;
std::unique_ptr<TimestampPacketContainer> timestampPacketContainer;
// number of events this event depends on
std::unique_ptr<TimestampPacketContainer> multiRootDeviceTimestampPacketContainer;
std::atomic<int> parentCount;
// event parents
std::vector<Event *> parentEvents;

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2022 Intel Corporation
* Copyright (C) 2019-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2022 Intel Corporation
* Copyright (C) 2019-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2022 Intel Corporation
* Copyright (C) 2018-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2022 Intel Corporation
* Copyright (C) 2018-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*

View File

@ -26,7 +26,6 @@ namespace NEO {
void flushDependentCsr(CommandStreamReceiver &dependentCsr, CsrDependencies &csrDeps) {
auto csrOwnership = dependentCsr.obtainUniqueOwnership();
dependentCsr.updateTagFromWait();
csrDeps.taskCountContainer.push_back({dependentCsr.peekTaskCount(), reinterpret_cast<uint64_t>(dependentCsr.getTagAddress())});
}
void EventsRequest::fillCsrDependenciesForTimestampPacketContainer(CsrDependencies &csrDeps, CommandStreamReceiver &currentCsr, CsrDependencies::DependenciesType depsType) const {
@ -60,6 +59,7 @@ void EventsRequest::fillCsrDependenciesForTimestampPacketContainer(CsrDependenci
if (productHelper.isDcFlushAllowed()) {
if (!dependentCsr.isLatestTaskCountFlushed()) {
flushDependentCsr(dependentCsr, csrDeps);
// csrDeps.taskCountContainer.push_back({dependentCsr.peekTaskCount(), reinterpret_cast<uint64_t>(dependentCsr.getTagAddress())});
currentCsr.makeResident(*dependentCsr.getTagAllocation());
}
}
@ -68,23 +68,22 @@ void EventsRequest::fillCsrDependenciesForTimestampPacketContainer(CsrDependenci
}
}
void EventsRequest::fillCsrDependenciesForTaskCountContainer(CsrDependencies &csrDeps, CommandStreamReceiver &currentCsr) const {
void EventsRequest::fillCsrDependenciesForRootDevices(CsrDependencies &csrDeps, CommandStreamReceiver &currentCsr) const {
for (cl_uint i = 0; i < this->numEventsInWaitList; i++) {
auto event = castToObjectOrAbort<Event>(this->eventWaitList[i]);
if (event->isUserEvent() || CompletionStamp::notReady == event->peekTaskCount()) {
continue;
}
if (event->getCommandQueue() && event->getCommandQueue()->getDevice().getRootDeviceIndex() != currentCsr.getRootDeviceIndex()) {
auto timestampPacketContainer = event->getMultiRootDeviceTimestampPacketNodes();
if (!timestampPacketContainer || timestampPacketContainer->peekNodes().empty()) {
continue;
}
auto &dependentCsr = event->getCommandQueue()->getGpgpuCommandStreamReceiver();
if (!dependentCsr.isLatestTaskCountFlushed()) {
flushDependentCsr(dependentCsr, csrDeps);
} else {
csrDeps.taskCountContainer.push_back({event->peekTaskCount(), reinterpret_cast<uint64_t>(dependentCsr.getTagAddress())});
}
auto graphicsAllocation = event->getCommandQueue()->getGpgpuCommandStreamReceiver().getTagsMultiAllocation()->getGraphicsAllocation(currentCsr.getRootDeviceIndex());
currentCsr.getResidencyAllocations().push_back(graphicsAllocation);
csrDeps.multiRootTimeStampSyncContainer.push_back(timestampPacketContainer);
}
}
}

View File

@ -25,7 +25,7 @@ struct EventsRequest {
: numEventsInWaitList(numEventsInWaitList), eventWaitList(eventWaitList), outEvent(outEvent) {}
void fillCsrDependenciesForTimestampPacketContainer(CsrDependencies &csrDeps, CommandStreamReceiver &currentCsr, CsrDependencies::DependenciesType depsType) const;
void fillCsrDependenciesForTaskCountContainer(CsrDependencies &csrDeps, CommandStreamReceiver &currentCsr) const;
void fillCsrDependenciesForRootDevices(CsrDependencies &csrDeps, CommandStreamReceiver &currentCsr) const;
void setupBcsCsrForOutputEvent(CommandStreamReceiver &bcsCsr) const;
cl_uint numEventsInWaitList;

View File

@ -118,10 +118,11 @@ CompletionStamp &CommandMapUnmap::submit(TaskCountType taskLevel, bool terminate
CommandComputeKernel::CommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> &kernelOperation, std::vector<Surface *> surfaces,
bool flushDC, bool usesSLM, uint32_t commandType, std::unique_ptr<PrintfHandler> &&printfHandler,
PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount)
PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount,
TagNodeBase *multiRootDeviceSyncNode)
: Command(commandQueue, kernelOperation), surfaces(std::move(surfaces)), flushDC(flushDC), slmUsed(usesSLM),
commandType(commandType), printfHandler(std::move(printfHandler)), kernel(kernel),
kernelCount(kernelCount), preemptionMode(preemptionMode) {
kernelCount(kernelCount), preemptionMode(preemptionMode), multiRootDeviceSyncNode(multiRootDeviceSyncNode) {
UNRECOVERABLE_IF(nullptr == this->kernel);
kernel->incRefInternal();
}
@ -163,6 +164,9 @@ CompletionStamp &CommandComputeKernel::submit(TaskCountType taskLevel, bool term
printfHandler->makeResident(commandStreamReceiver);
}
makeTimestampPacketsResident(commandStreamReceiver);
if (multiRootDeviceSyncNode != nullptr) {
commandStreamReceiver.makeResident(*multiRootDeviceSyncNode->getBaseGraphicsAllocation());
}
if (kernelOperation->blitPropertiesContainer.size() > 0) {
CsrDependencies csrDeps;
@ -214,7 +218,7 @@ CompletionStamp &CommandComputeKernel::submit(TaskCountType taskLevel, bool term
false); // hasRelaxedOrderingDependencies
if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver);
eventsRequest.fillCsrDependenciesForRootDevices(dispatchFlags.csrDependencies, commandStreamReceiver);
}
const bool isHandlingBarrier = commandQueue.getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired();
@ -307,7 +311,7 @@ TaskCountType CommandWithoutKernel::dispatchBlitOperation() {
blitProperties.outputTimestampPacket = currentTimestampPacketNodes->peekNodes()[0];
if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
eventsRequest.fillCsrDependenciesForTaskCountContainer(blitProperties.csrDependencies, *bcsCsr);
eventsRequest.fillCsrDependenciesForRootDevices(blitProperties.csrDependencies, *bcsCsr);
}
const auto newTaskCount = bcsCsr->flushBcsTask(kernelOperation->blitPropertiesContainer, false, commandQueue.isProfilingEnabled(), commandQueue.getDevice());
@ -389,7 +393,7 @@ CompletionStamp &CommandWithoutKernel::submit(TaskCountType taskLevel, bool term
false); // hasRelaxedOrderingDependencies
if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver);
eventsRequest.fillCsrDependenciesForRootDevices(dispatchFlags.csrDependencies, commandStreamReceiver);
}
const bool isHandlingBarrier = commandQueue.getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired();

View File

@ -127,7 +127,7 @@ class CommandComputeKernel : public Command {
public:
CommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> &kernelOperation, std::vector<Surface *> surfaces,
bool flushDC, bool usesSLM, uint32_t commandType, std::unique_ptr<PrintfHandler> &&printfHandler,
PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount);
PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount, TagNodeBase *multiRootDeviceSyncNode);
~CommandComputeKernel() override;
@ -146,6 +146,7 @@ class CommandComputeKernel : public Command {
Kernel *kernel;
uint32_t kernelCount;
PreemptionMode preemptionMode;
TagNodeBase *multiRootDeviceSyncNode;
};
class CommandWithoutKernel : public Command {

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2022 Intel Corporation
* Copyright (C) 2021-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2022 Intel Corporation
* Copyright (C) 2021-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2022 Intel Corporation
* Copyright (C) 2021-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2022 Intel Corporation
* Copyright (C) 2018-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -19,6 +19,7 @@
#include "opencl/test/unit_test/command_queue/command_queue_fixture.h"
#include "opencl/test/unit_test/fixtures/buffer_fixture.h"
#include "opencl/test/unit_test/fixtures/image_fixture.h"
#include "opencl/test/unit_test/helpers/cl_hw_parse.h"
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
#include "opencl/test/unit_test/mocks/mock_event.h"
#include "opencl/test/unit_test/mocks/mock_kernel.h"
@ -193,6 +194,7 @@ class MockCommandStreamReceiverWithFailingFlushBatchedSubmission : public MockCo
template <typename GfxFamily>
struct MockCommandQueueHwWithOverwrittenCsr : public CommandQueueHw<GfxFamily> {
using CommandQueueHw<GfxFamily>::CommandQueueHw;
using CommandQueueHw<GfxFamily>::timestampPacketContainer;
MockCommandStreamReceiverWithFailingFlushBatchedSubmission *csr;
CommandStreamReceiver &getGpgpuCommandStreamReceiver() const override { return *csr; }
};

View File

@ -21,6 +21,7 @@
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
#include "opencl/test/unit_test/mocks/mock_event.h"
#include "opencl/test/unit_test/mocks/mock_kernel.h"
#include "opencl/test/unit_test/mocks/mock_mdi.h"
using namespace NEO;
@ -970,4 +971,4 @@ HWTEST_F(CommandQueueHwTest, GivenBuiltinKernelWhenBuiltinDispatchInfoBuilderIsP
EXPECT_EQ(builder.paramsToUse.elws.x, dispatchInfo->getEnqueuedWorkgroupSize().x);
EXPECT_EQ(builder.paramsToUse.offset.x, dispatchInfo->getOffset().x);
EXPECT_EQ(builder.paramsToUse.kernel, dispatchInfo->getKernel());
}
}

View File

@ -31,6 +31,7 @@
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
#include "opencl/test/unit_test/mocks/mock_buffer.h"
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
#include "opencl/test/unit_test/mocks/mock_event.h"
#include "opencl/test/unit_test/mocks/mock_kernel.h"
#include "opencl/test/unit_test/mocks/mock_mdi.h"
#include "opencl/test/unit_test/mocks/mock_program.h"

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2022 Intel Corporation
* Copyright (C) 2021-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -557,11 +557,11 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenTimestamp
MockMultiDispatchInfo multiDispatchInfo(device.get(), std::vector<Kernel *>({kernel1.mockKernel, kernel2.mockKernel}));
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = false;
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(cmdQ, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false);
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(cmdQ, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false, nullptr);
size_t sizeWithDisabled = cmdQ.requestedCmdStreamSize;
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(cmdQ, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false);
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(cmdQ, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false, nullptr);
size_t sizeWithEnabled = cmdQ.requestedCmdStreamSize;
size_t additionalSize = 0u;
@ -669,7 +669,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenAutoLocal
EXPECT_EQ((uint32_t)(expectedKernelStartOffset), idd.getKernelStartPointer());
auto expectedSizeCS = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, CsrDependencies(), false, false,
false, *cmdQ.get(), multiDispatchInfo, false, false);
false, *cmdQ.get(), multiDispatchInfo, false, false, nullptr);
expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END);
expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize);
EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS);
@ -738,7 +738,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenPassInlin
EXPECT_EQ((uint32_t)(expectedKernelStartOffset), idd.getKernelStartPointer());
auto expectedSizeCS = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, CsrDependencies(), false, false,
false, *cmdQ.get(), multiDispatchInfo, false, false);
false, *cmdQ.get(), multiDispatchInfo, false, false, nullptr);
expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END);
expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize);
EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS);

View File

@ -234,7 +234,7 @@ HWTEST_F(EnqueueHandlerTest, givenNonBlitPropertyWhenEnqueueIsBlockedThenDontReg
Surface *surfaces[] = {nullptr};
mockCmdQ->enqueueBlocked(CL_COMMAND_MARKER, surfaces, size_t(0), multiDispatchInfo, timestampPacketDependencies,
blockedCommandsData, enqueuePropertiesForDependencyFlush, eventsRequest,
eventBuilder, std::unique_ptr<PrintfHandler>(nullptr), nullptr);
eventBuilder, std::unique_ptr<PrintfHandler>(nullptr), nullptr, nullptr);
EXPECT_FALSE(blockedCommandsDataForDependencyFlush->blitEnqueue);
}
@ -267,7 +267,7 @@ HWTEST_F(EnqueueHandlerTest, givenBlitPropertyWhenEnqueueIsBlockedThenRegisterBl
Surface *surfaces[] = {nullptr};
mockCmdQ->enqueueBlocked(CL_COMMAND_READ_BUFFER, surfaces, size_t(0), multiDispatchInfo, timestampPacketDependencies,
blockedCommandsData, enqueuePropertiesForBlitEnqueue, eventsRequest,
eventBuilder, std::unique_ptr<PrintfHandler>(nullptr), mockCmdQ->getBcsForAuxTranslation());
eventBuilder, std::unique_ptr<PrintfHandler>(nullptr), mockCmdQ->getBcsForAuxTranslation(), nullptr);
EXPECT_TRUE(blockedCommandsDataForBlitEnqueue->blitEnqueue);
EXPECT_EQ(blitProperties.srcAllocation, blockedCommandsDataForBlitEnqueue->blitPropertiesContainer.begin()->srcAllocation);
EXPECT_EQ(blitProperties.dstAllocation, blockedCommandsDataForBlitEnqueue->blitPropertiesContainer.begin()->dstAllocation);
@ -351,7 +351,7 @@ HWTEST_F(DispatchFlagsBlitTests, givenBlitEnqueueWhenDispatchingCommandsWithoutK
timestampPacketDependencies.cacheFlushNodes.add(mockCmdQ->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag());
BlitProperties blitProperties = mockCmdQ->processDispatchForBlitEnqueue(bcsCsr, multiDispatchInfo, timestampPacketDependencies,
eventsRequest, &mockCmdQ->getCS(0), CL_COMMAND_READ_BUFFER, false);
eventsRequest, &mockCmdQ->getCS(0), CL_COMMAND_READ_BUFFER, false, nullptr);
BlitPropertiesContainer blitPropertiesContainer;
blitPropertiesContainer.push_back(blitProperties);
@ -395,7 +395,7 @@ HWTEST_F(DispatchFlagsBlitTests, givenN1EnabledWhenDispatchingWithoutKernelThenA
mockCmdQ->obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, true, bcsCsr);
timestampPacketDependencies.cacheFlushNodes.add(mockCmdQ->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag());
BlitProperties blitProperties = mockCmdQ->processDispatchForBlitEnqueue(bcsCsr, multiDispatchInfo, timestampPacketDependencies,
eventsRequest, &mockCmdQ->getCS(0), CL_COMMAND_READ_BUFFER, false);
eventsRequest, &mockCmdQ->getCS(0), CL_COMMAND_READ_BUFFER, false, nullptr);
BlitPropertiesContainer blitPropertiesContainer;
blitPropertiesContainer.push_back(blitProperties);
@ -441,7 +441,7 @@ HWTEST_F(DispatchFlagsTests, givenMockKernelWhenSettingAdditionalKernelExecInfoT
std::vector<Surface *> v;
pKernel->setAdditionalKernelExecInfo(123u);
std::unique_ptr<CommandComputeKernel> cmd(new CommandComputeKernel(*mockCmdQ.get(), blockedCommandsData, v, false, false, false, std::move(printfHandler), PreemptionMode::Disabled, pKernel, 1));
std::unique_ptr<CommandComputeKernel> cmd(new CommandComputeKernel(*mockCmdQ.get(), blockedCommandsData, v, false, false, false, std::move(printfHandler), PreemptionMode::Disabled, pKernel, 1, nullptr));
cmd->submit(1u, false);
EXPECT_EQ(mockCsr->passedDispatchFlags.additionalKernelExecInfo, 123u);

View File

@ -54,7 +54,7 @@ HWTEST2_F(DispatchFlagsTests, whenSubmittingKernelWithAdditionalKernelExecInfoTh
std::vector<Surface *> v;
pKernel->setAdditionalKernelExecInfo(AdditionalKernelExecInfo::DisableOverdispatch);
std::unique_ptr<CommandComputeKernel> cmd(new CommandComputeKernel(*mockCmdQ.get(), blockedCommandsData, v, false, false, false, std::move(printfHandler), PreemptionMode::Disabled, pKernel, 1));
std::unique_ptr<CommandComputeKernel> cmd(new CommandComputeKernel(*mockCmdQ.get(), blockedCommandsData, v, false, false, false, std::move(printfHandler), PreemptionMode::Disabled, pKernel, 1, nullptr));
cmd->submit(1u, false);
EXPECT_EQ(mockCsr->passedDispatchFlags.additionalKernelExecInfo, AdditionalKernelExecInfo::DisableOverdispatch);

View File

@ -2003,10 +2003,10 @@ HWTEST_F(PauseOnGpuTests, givenGpuScratchWriteEnabledWhenEstimatingCommandStream
dispatchInfo.setKernel(mockKernel.mockKernel);
multiDispatchInfo.push(dispatchInfo);
auto baseCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false);
auto baseCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, nullptr);
DebugManager.flags.GpuScratchRegWriteAfterWalker.set(1);
auto extendedCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false);
auto extendedCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, nullptr);
EXPECT_EQ(baseCommandStreamSize + sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM), extendedCommandStreamSize);
}

View File

@ -1014,8 +1014,8 @@ HWTEST_F(EnqueueKernelTest, givenTimestampWriteEnableWhenMarkerProfilingWithoutW
dispatchInfo.setKernel(mockKernel.mockKernel);
multiDispatchInfo.push(dispatchInfo);
auto baseCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false);
auto extendedCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, true, false);
auto baseCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, nullptr);
auto extendedCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, true, false, nullptr);
EXPECT_EQ(baseCommandStreamSize + 4 * EncodeStoreMMIO<FamilyType>::size + MemorySynchronizationCommands<FamilyType>::getSizeForSingleBarrier(false), extendedCommandStreamSize);
}
@ -1033,8 +1033,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, EnqueueKernelTest, givenTimestampWriteEnableOnMulti
dispatchInfo.setKernel(mockKernel.mockKernel);
multiDispatchInfo.push(dispatchInfo);
auto baseCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false);
auto extendedCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, true, false);
auto baseCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, nullptr);
auto extendedCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, true, false, nullptr);
EXPECT_EQ(baseCommandStreamSize + 4 * EncodeStoreMMIO<FamilyType>::size + ImplicitScalingDispatch<FamilyType>::getBarrierSize(csr.peekHwInfo(), false, false), extendedCommandStreamSize);
}
@ -1047,8 +1047,8 @@ HWTEST_F(EnqueueKernelTest, givenTimestampWriteEnableWhenMarkerProfilingWithWait
dispatchInfo.setKernel(mockKernel.mockKernel);
multiDispatchInfo.push(dispatchInfo);
auto baseCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false);
auto extendedCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, true, true);
auto baseCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, nullptr);
auto extendedCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, true, true, nullptr);
EXPECT_EQ(baseCommandStreamSize + 4 * EncodeStoreMMIO<FamilyType>::size, extendedCommandStreamSize);
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2022 Intel Corporation
* Copyright (C) 2018-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -23,6 +23,7 @@
#include "opencl/test/unit_test/fixtures/hello_world_kernel_fixture.h"
#include "opencl/test/unit_test/fixtures/image_fixture.h"
#include "opencl/test/unit_test/fixtures/simple_arg_kernel_fixture.h"
#include "opencl/test/unit_test/mocks/mock_event.h"
using namespace NEO;
@ -96,7 +97,7 @@ HWTEST_F(GetSizeRequiredBufferTest, WhenFillingBufferThenHeapsAndCommandBufferCo
auto usedAfterSSH = ssh.getUsed();
auto expectedSizeCS = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_FILL_BUFFER, CsrDependencies(), false, false,
false, *pCmdQ, multiDispatchInfo, false, false);
false, *pCmdQ, multiDispatchInfo, false, false, nullptr);
auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredDSH(multiDispatchInfo);
auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredIOH(multiDispatchInfo);
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo);
@ -149,7 +150,7 @@ HWTEST_F(GetSizeRequiredBufferTest, WhenCopyingBufferThenHeapsAndCommandBufferCo
auto usedAfterSSH = ssh.getUsed();
auto expectedSizeCS = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_COPY_BUFFER, CsrDependencies(), false, false,
false, *pCmdQ, multiDispatchInfo, false, false);
false, *pCmdQ, multiDispatchInfo, false, false, nullptr);
auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredDSH(multiDispatchInfo);
auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredIOH(multiDispatchInfo);
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo);
@ -203,7 +204,7 @@ HWTEST_F(GetSizeRequiredBufferTest, WhenReadingBufferNonBlockingThenHeapsAndComm
auto usedAfterSSH = ssh.getUsed();
auto expectedSizeCS = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_READ_BUFFER, CsrDependencies(), false, false,
false, *pCmdQ, multiDispatchInfo, false, false);
false, *pCmdQ, multiDispatchInfo, false, false, nullptr);
auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredDSH(multiDispatchInfo);
auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredIOH(multiDispatchInfo);
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo);
@ -258,7 +259,7 @@ HWTEST_F(GetSizeRequiredBufferTest, WhenReadingBufferBlockingThenThenHeapsAndCom
auto usedAfterSSH = ssh.getUsed();
auto expectedSizeCS = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_READ_BUFFER, CsrDependencies(), false, false,
false, *pCmdQ, multiDispatchInfo, false, false);
false, *pCmdQ, multiDispatchInfo, false, false, nullptr);
auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredDSH(multiDispatchInfo);
auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredIOH(multiDispatchInfo);
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo);
@ -313,7 +314,7 @@ HWTEST_F(GetSizeRequiredBufferTest, WhenWritingBufferNonBlockingThenHeapsAndComm
auto usedAfterSSH = ssh.getUsed();
auto expectedSizeCS = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_WRITE_BUFFER, CsrDependencies(), false, false,
false, *pCmdQ, multiDispatchInfo, false, false);
false, *pCmdQ, multiDispatchInfo, false, false, nullptr);
auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredDSH(multiDispatchInfo);
auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredIOH(multiDispatchInfo);
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo);
@ -365,7 +366,7 @@ HWTEST_F(GetSizeRequiredBufferTest, WhenWritingBufferBlockingThenHeapsAndCommand
auto usedAfterSSH = ssh.getUsed();
auto expectedSizeCS = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_WRITE_BUFFER, CsrDependencies(), false, false,
false, *pCmdQ, multiDispatchInfo, false, false);
false, *pCmdQ, multiDispatchInfo, false, false, nullptr);
auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredDSH(multiDispatchInfo);
auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredIOH(multiDispatchInfo);
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo);
@ -380,6 +381,68 @@ HWTEST_F(GetSizeRequiredBufferTest, WhenWritingBufferBlockingThenHeapsAndCommand
EXPECT_GE(expectedSizeSSH, usedAfterSSH - usedBeforeSSH);
}
HWTEST_F(GetSizeRequiredBufferTest, GivenOutEventForSingleDeviceContextWhenCalculatingCSSizeThenExtraPipeControlIsNotAdded) {
UltClDeviceFactory deviceFactory{1, 0};
DebugManager.flags.EnableMultiRootDeviceContexts.set(true);
cl_device_id devices[] = {deviceFactory.rootDevices[0]};
MockContext pContext(ClDeviceVector(devices, 1));
MockKernelWithInternals mockKernel(*pContext.getDevices()[0]);
DispatchInfo dispatchInfo;
MultiDispatchInfo multiDispatchInfo(mockKernel.mockKernel);
dispatchInfo.setKernel(mockKernel.mockKernel);
multiDispatchInfo.push(dispatchInfo);
auto event = std::make_unique<MockEvent<Event>>(&pContext, nullptr, 0, 0, 0);
cl_event clEvent = event.get();
auto baseCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, nullptr);
auto extendedCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, &clEvent);
EXPECT_EQ(baseCommandStreamSize, extendedCommandStreamSize);
}
HWTEST_F(GetSizeRequiredBufferTest, GivenUserEventForMultiDeviceContextWhenCalculatingCSSizeThenExtraPipeControlIsNotAdded) {
UltClDeviceFactory deviceFactory{2, 0};
DebugManager.flags.EnableMultiRootDeviceContexts.set(true);
cl_device_id devices[] = {deviceFactory.rootDevices[0],
deviceFactory.rootDevices[1]};
MockContext pContext(ClDeviceVector(devices, 2));
MockKernelWithInternals mockKernel(*pContext.getDevices()[0]);
DispatchInfo dispatchInfo;
MultiDispatchInfo multiDispatchInfo(mockKernel.mockKernel);
dispatchInfo.setKernel(mockKernel.mockKernel);
multiDispatchInfo.push(dispatchInfo);
auto userEvent1 = std::make_unique<UserEvent>(&pContext);
cl_event clEvent = userEvent1.get();
auto baseCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, nullptr);
auto extendedCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, &clEvent);
EXPECT_EQ(baseCommandStreamSize, extendedCommandStreamSize);
}
HWTEST_F(GetSizeRequiredBufferTest, GivenOutEventForMultiDeviceContextWhenCalculatingCSSizeThenExtraPipeControlIsAdded) {
UltClDeviceFactory deviceFactory{2, 0};
DebugManager.flags.EnableMultiRootDeviceContexts.set(true);
cl_device_id devices[] = {deviceFactory.rootDevices[0],
deviceFactory.rootDevices[1]};
MockContext pContext(ClDeviceVector(devices, 2));
MockKernelWithInternals mockKernel(*pContext.getDevices()[0]);
DispatchInfo dispatchInfo;
MultiDispatchInfo multiDispatchInfo(mockKernel.mockKernel);
dispatchInfo.setKernel(mockKernel.mockKernel);
multiDispatchInfo.push(dispatchInfo);
auto event = std::make_unique<MockEvent<Event>>(&pContext, nullptr, 0, 0, 0);
cl_event clEvent = event.get();
auto baseCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, nullptr);
auto extendedCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, &clEvent);
EXPECT_EQ(baseCommandStreamSize + MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(pContext.getDevices()[0]->getHardwareInfo(), false), extendedCommandStreamSize);
}
HWTEST_F(GetSizeRequiredBufferTest, givenMultipleKernelRequiringSshWhenTotalSizeIsComputedThenItIsProperlyAligned) {
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer,
pCmdQ->getClDevice());

View File

@ -1903,7 +1903,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBlockedKernelWhenItIsUnblocke
blockedCommandsData->setHeaps(dsh, ioh, ssh);
std::vector<Surface *> surfaces;
event->setCommand(std::make_unique<CommandComputeKernel>(*pCmdQ, blockedCommandsData, surfaces, false, false, false, nullptr, pDevice->getPreemptionMode(), pKernel, 1));
event->setCommand(std::make_unique<CommandComputeKernel>(*pCmdQ, blockedCommandsData, surfaces, false, false, false, nullptr, pDevice->getPreemptionMode(), pKernel, 1, nullptr));
event->submitCommand(false);
EXPECT_EQ(numGrfRequired, csr->savedDispatchFlags.numGrfRequired);
@ -1948,7 +1948,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBlockedKernelWhenInitializeBc
auto blockedCommandsData = std::make_unique<KernelOperation>(cmdStream, *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
std::vector<Surface *> surfaces;
event->setCommand(std::make_unique<CommandComputeKernel>(*pCmdQ, blockedCommandsData, surfaces, false, false, false, nullptr, pDevice->getPreemptionMode(), pKernel, 1));
event->setCommand(std::make_unique<CommandComputeKernel>(*pCmdQ, blockedCommandsData, surfaces, false, false, false, nullptr, pDevice->getPreemptionMode(), pKernel, 1, nullptr));
event->submitCommand(false);
EXPECT_FALSE(pCmdQ->isCsrLocked);
}

View File

@ -7,6 +7,7 @@
#include "shared/source/command_stream/wait_status.h"
#include "shared/test/common/mocks/mock_command_stream_receiver.h"
#include "shared/test/common/mocks/mock_timestamp_container.h"
#include "shared/test/common/mocks/ult_device_factory.h"
#include "shared/test/common/test_macros/hw_test.h"
@ -14,6 +15,7 @@
#include "opencl/source/event/user_event.h"
#include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h"
#include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h"
#include "opencl/test/unit_test/mocks/mock_event.h"
#include "opencl/test/unit_test/mocks/mock_kernel.h"
#include "opencl/test/unit_test/mocks/mock_program.h"
#include "opencl/test/unit_test/test_macros/test_checks_ocl.h"
@ -45,12 +47,18 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverBufferTests, givenMultipleEventInMu
MockGraphicsAllocation svmAlloc(svmPtr, svmSize);
Event event1(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 5, 15);
auto node1 = event1.getMultiRootTimestampSyncNode();
Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 6, 16);
Event event3(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 4, 20);
auto node3 = event3.getMultiRootTimestampSyncNode();
Event event4(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 3, 4);
auto node4 = event4.getMultiRootTimestampSyncNode();
Event event5(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 2, 7);
auto node5 = event5.getMultiRootTimestampSyncNode();
UserEvent userEvent1(&pCmdQ1->getContext());
userEvent1.getMultiRootTimestampSyncNode();
UserEvent userEvent2(&pCmdQ2->getContext());
userEvent2.getMultiRootTimestampSyncNode();
userEvent1.setStatus(CL_COMPLETE);
userEvent2.setStatus(CL_COMPLETE);
@ -87,12 +95,12 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverBufferTests, givenMultipleEventInMu
EXPECT_EQ(2u, semaphores.size());
auto semaphoreCmd0 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]));
EXPECT_EQ(4u, semaphoreCmd0->getSemaphoreDataDword());
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress());
EXPECT_EQ(1u, semaphoreCmd0->getSemaphoreDataDword());
EXPECT_EQ(reinterpret_cast<uint64_t>(node4->getContextEndAddress(0u)), semaphoreCmd0->getSemaphoreGraphicsAddress());
auto semaphoreCmd1 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[1]));
EXPECT_EQ(7u, semaphoreCmd1->getSemaphoreDataDword());
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress());
EXPECT_EQ(1u, semaphoreCmd1->getSemaphoreDataDword());
EXPECT_EQ(reinterpret_cast<uint64_t>(node5->getContextEndAddress(0u)), semaphoreCmd1->getSemaphoreGraphicsAddress());
}
{
@ -115,12 +123,12 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverBufferTests, givenMultipleEventInMu
EXPECT_EQ(2u, semaphores.size());
auto semaphoreCmd0 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]));
EXPECT_EQ(15u, semaphoreCmd0->getSemaphoreDataDword());
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress());
EXPECT_EQ(1u, semaphoreCmd0->getSemaphoreDataDword());
EXPECT_EQ(reinterpret_cast<uint64_t>(node1->getContextEndAddress(0u)), semaphoreCmd0->getSemaphoreGraphicsAddress());
auto semaphoreCmd1 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[1]));
EXPECT_EQ(20u, semaphoreCmd1->getSemaphoreDataDword());
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress());
EXPECT_EQ(1u, semaphoreCmd1->getSemaphoreDataDword());
EXPECT_EQ(reinterpret_cast<uint64_t>(node3->getContextEndAddress(0u)), semaphoreCmd1->getSemaphoreGraphicsAddress());
}
alignedFree(svmPtr);
}
@ -147,17 +155,24 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverTests, givenMultipleEventInMultiRoo
cl_device_id devices[] = {device1, device2, device3};
auto context = std::make_unique<MockContext>(ClDeviceVector(devices, 3), false);
auto mockTagAllocator = std::make_unique<MockTagAllocator<>>(context->getRootDeviceIndices(), device1->getExecutionEnvironment()->memoryManager.get(), 10u);
std::unique_ptr<TagAllocatorBase> uniquePtr(mockTagAllocator.release());
context->setMultiRootDeviceTimestampPacketAllocator(uniquePtr);
auto pCmdQ1 = context->getSpecialQueue(1u);
auto pCmdQ2 = context->getSpecialQueue(2u);
auto pCmdQ3 = context->getSpecialQueue(3u);
Event event1(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 5, 15);
auto node1 = event1.getMultiRootTimestampSyncNode();
Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 6, 16);
Event event3(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 4, 20);
auto node3 = event3.getMultiRootTimestampSyncNode();
Event event4(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 3, 4);
auto node4 = event4.getMultiRootTimestampSyncNode();
Event event5(pCmdQ3, CL_COMMAND_NDRANGE_KERNEL, 7, 21);
auto node5 = event5.getMultiRootTimestampSyncNode();
Event event6(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 2, 7);
auto node6 = event6.getMultiRootTimestampSyncNode();
UserEvent userEvent1(&pCmdQ1->getContext());
UserEvent userEvent2(&pCmdQ2->getContext());
@ -190,16 +205,16 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverTests, givenMultipleEventInMultiRoo
EXPECT_EQ(3u, semaphores.size());
auto semaphoreCmd0 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]));
EXPECT_EQ(4u, semaphoreCmd0->getSemaphoreDataDword());
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress());
EXPECT_EQ(1u, semaphoreCmd0->getSemaphoreDataDword());
EXPECT_EQ(reinterpret_cast<uint64_t>(node4->getContextEndAddress(0u)), semaphoreCmd0->getSemaphoreGraphicsAddress());
auto semaphoreCmd1 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[1]));
EXPECT_EQ(21u, semaphoreCmd1->getSemaphoreDataDword());
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ3->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress());
EXPECT_EQ(1u, semaphoreCmd1->getSemaphoreDataDword());
EXPECT_EQ(reinterpret_cast<uint64_t>(node5->getContextEndAddress(0u)), semaphoreCmd1->getSemaphoreGraphicsAddress());
auto semaphoreCmd2 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[2]));
EXPECT_EQ(7u, semaphoreCmd2->getSemaphoreDataDword());
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd2->getSemaphoreGraphicsAddress());
EXPECT_EQ(1u, semaphoreCmd2->getSemaphoreDataDword());
EXPECT_EQ(reinterpret_cast<uint64_t>(node6->getContextEndAddress(0u)), semaphoreCmd2->getSemaphoreGraphicsAddress());
}
{
@ -215,16 +230,16 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverTests, givenMultipleEventInMultiRoo
EXPECT_EQ(3u, semaphores.size());
auto semaphoreCmd0 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]));
EXPECT_EQ(15u, semaphoreCmd0->getSemaphoreDataDword());
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress());
EXPECT_EQ(1u, semaphoreCmd0->getSemaphoreDataDword());
EXPECT_EQ(reinterpret_cast<uint64_t>(node1->getContextEndAddress(0u)), semaphoreCmd0->getSemaphoreGraphicsAddress());
auto semaphoreCmd1 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[1]));
EXPECT_EQ(20u, semaphoreCmd1->getSemaphoreDataDword());
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress());
EXPECT_EQ(1u, semaphoreCmd1->getSemaphoreDataDword());
EXPECT_EQ(reinterpret_cast<uint64_t>(node3->getContextEndAddress(0u)), semaphoreCmd1->getSemaphoreGraphicsAddress());
auto semaphoreCmd2 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[2]));
EXPECT_EQ(21u, semaphoreCmd2->getSemaphoreDataDword());
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ3->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd2->getSemaphoreGraphicsAddress());
EXPECT_EQ(1u, semaphoreCmd2->getSemaphoreDataDword());
EXPECT_EQ(reinterpret_cast<uint64_t>(node5->getContextEndAddress(0u)), semaphoreCmd2->getSemaphoreGraphicsAddress());
}
{
@ -249,8 +264,8 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverTests, givenMultipleEventInMultiRoo
EXPECT_EQ(1u, semaphores.size());
auto semaphoreCmd0 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]));
EXPECT_EQ(15u, semaphoreCmd0->getSemaphoreDataDword());
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress());
EXPECT_EQ(1u, semaphoreCmd0->getSemaphoreDataDword());
EXPECT_EQ(reinterpret_cast<uint64_t>(node1->getContextEndAddress(0u)), semaphoreCmd0->getSemaphoreGraphicsAddress());
}
}
@ -286,11 +301,16 @@ HWTEST_F(CrossDeviceDependenciesTests, givenMultipleEventInMultiRootDeviceEnviro
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
Event event1(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 5, 15);
event1.getMultiRootTimestampSyncNode();
Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 6, 16);
Event event3(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 1, 6);
event3.getMultiRootTimestampSyncNode();
Event event4(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 4, 20);
event4.getMultiRootTimestampSyncNode();
Event event5(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 3, 4);
event5.getMultiRootTimestampSyncNode();
Event event6(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 2, 7);
event6.getMultiRootTimestampSyncNode();
UserEvent userEvent1(&pCmdQ1->getContext());
UserEvent userEvent2(&pCmdQ2->getContext());
@ -316,10 +336,10 @@ HWTEST_F(CrossDeviceDependenciesTests, givenMultipleEventInMultiRootDeviceEnviro
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, nullptr);
CsrDependencies csrDeps;
eventsRequest.fillCsrDependenciesForTaskCountContainer(csrDeps, pCmdQ1->getGpgpuCommandStreamReceiver());
eventsRequest.fillCsrDependenciesForRootDevices(csrDeps, pCmdQ1->getGpgpuCommandStreamReceiver());
EXPECT_EQ(0u, csrDeps.taskCountContainer.size());
EXPECT_EQ(0u, TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer<FamilyType>(csrDeps));
EXPECT_EQ(0u, csrDeps.multiRootTimeStampSyncContainer.size());
EXPECT_EQ(0u, TimestampPacketHelper::getRequiredCmdStreamSizeForMultiRootDeviceSyncNodesContainer<FamilyType>(csrDeps));
}
{
@ -342,10 +362,10 @@ HWTEST_F(CrossDeviceDependenciesTests, givenMultipleEventInMultiRootDeviceEnviro
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, nullptr);
CsrDependencies csrDeps;
eventsRequest.fillCsrDependenciesForTaskCountContainer(csrDeps, pCmdQ2->getGpgpuCommandStreamReceiver());
eventsRequest.fillCsrDependenciesForRootDevices(csrDeps, pCmdQ2->getGpgpuCommandStreamReceiver());
EXPECT_EQ(3u, csrDeps.taskCountContainer.size());
EXPECT_EQ(3u * sizeof(MI_SEMAPHORE_WAIT), TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer<FamilyType>(csrDeps));
EXPECT_EQ(3u, csrDeps.multiRootTimeStampSyncContainer.size());
EXPECT_EQ(3u * sizeof(MI_SEMAPHORE_WAIT), TimestampPacketHelper::getRequiredCmdStreamSizeForMultiRootDeviceSyncNodesContainer<FamilyType>(csrDeps));
}
}
@ -376,6 +396,7 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW
cl_event outputEvent2{};
auto currentCsUsedCmdq1 = pCmdQ1->getCS(0).getUsed();
pCmdQ2->enqueueReadBuffer(buffer, CL_FALSE, 0, MemoryConstants::pageSize, hostPtr, nullptr,
1,
&outputEvent1,
@ -399,14 +420,12 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW
nullptr);
{
HardwareParse csHwParser;
csHwParser.parseCommands<FamilyType>(pCmdQ1->getCS(0));
csHwParser.parseCommands<FamilyType>(pCmdQ1->getCS(0), currentCsUsedCmdq1);
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(csHwParser.cmdList.begin(), csHwParser.cmdList.end());
EXPECT_EQ(0u, semaphores.size());
}
userEvent1.setStatus(CL_COMPLETE);
event1->release();
event2->release();
pCmdQ1->finish();
pCmdQ2->finish();
{
@ -417,7 +436,7 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW
EXPECT_EQ(1u, semaphores.size());
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]));
EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword());
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd->getSemaphoreGraphicsAddress());
EXPECT_EQ(reinterpret_cast<uint64_t>(event2->getMultiRootDeviceTimestampPacketNodes()->peekNodes().at(0)->getContextEndAddress(0u)), semaphoreCmd->getSemaphoreGraphicsAddress());
}
{
HardwareParse csHwParser;
@ -426,9 +445,11 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW
EXPECT_EQ(1u, semaphores.size());
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]));
EXPECT_EQ(0u, semaphoreCmd->getSemaphoreDataDword());
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd->getSemaphoreGraphicsAddress());
EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword());
EXPECT_EQ(reinterpret_cast<uint64_t>(event1->getMultiRootDeviceTimestampPacketNodes()->peekNodes().at(0)->getContextEndAddress(0u)), semaphoreCmd->getSemaphoreGraphicsAddress());
}
event1->release();
event2->release();
buffer->release();
}
@ -458,14 +479,14 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW
char hostPtr[MemoryConstants::pageSize]{};
cl_event outputEvent2{};
auto currentCsUsed = pCmdQ1->getCS(0).getUsed();
pCmdQ1->enqueueReadBuffer(buffer, CL_FALSE, 0, MemoryConstants::pageSize, hostPtr, nullptr,
1,
&outputEvent1,
&outputEvent2);
{
HardwareParse csHwParser;
csHwParser.parseCommands<FamilyType>(pCmdQ1->getCS(0));
csHwParser.parseCommands<FamilyType>(pCmdQ1->getCS(0), currentCsUsed);
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(csHwParser.cmdList.begin(), csHwParser.cmdList.end());
EXPECT_EQ(0u, semaphores.size());
@ -482,7 +503,7 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW
nullptr);
{
HardwareParse csHwParser;
csHwParser.parseCommands<FamilyType>(pCmdQ1->getCS(0));
csHwParser.parseCommands<FamilyType>(pCmdQ1->getCS(0), currentCsUsed);
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(csHwParser.cmdList.begin(), csHwParser.cmdList.end());
EXPECT_EQ(0u, semaphores.size());
@ -590,9 +611,6 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW
EXPECT_EQ(0u, semaphores.size());
}
userEvent1.setStatus(CL_COMPLETE);
event1->release();
event2->release();
event3->release();
pCmdQ1->finish();
pCmdQ2->finish();
@ -604,7 +622,8 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW
EXPECT_EQ(1u, semaphores.size());
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]));
EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword());
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd->getSemaphoreGraphicsAddress());
auto node = event2->getMultiRootDeviceTimestampPacketNodes()->peekNodes().at(0);
EXPECT_EQ(node->getGpuAddress() + node->getContextEndOffset(), semaphoreCmd->getSemaphoreGraphicsAddress());
}
{
HardwareParse csHwParser;
@ -620,8 +639,9 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW
EXPECT_EQ(2u, semaphores.size());
auto semaphoreCmd0 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]));
EXPECT_EQ(0u, semaphoreCmd0->getSemaphoreDataDword());
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress());
EXPECT_EQ(1u, semaphoreCmd0->getSemaphoreDataDword());
auto node = event1->getMultiRootDeviceTimestampPacketNodes()->peekNodes().at(0);
EXPECT_EQ(node->getGpuAddress() + node->getContextEndOffset(), semaphoreCmd0->getSemaphoreGraphicsAddress());
}
{
HardwareParse csHwParser;
@ -630,6 +650,9 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW
EXPECT_LE(1u, semaphores.size());
}
event1->release();
event2->release();
event3->release();
buffer->release();
pCmdQ1->release();
pCmdQ2->release();
@ -961,3 +984,73 @@ HWTEST_F(BcsCrossDeviceMigrationTests, givenBufferWithMultiStorageWhenEnqueueRea
EXPECT_EQ(buffer.get(), cmdQueue->migrateMultiGraphicsAllocationsReceivedOperationParams.srcMemObj);
}
HWTEST_F(CrossDeviceDependenciesTests, givenMultipleEventInMultiRootDeviceEnvironmentWhenTheyDoNotHaveMultiRootSyncNodeThenCsrDepsDoesNotHaveAnyMultiRootSyncContainer) {
Event event1(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 5, 15);
Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 6, 16);
Event event3(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 1, 6);
Event event4(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 4, 20);
Event event5(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 3, 4);
Event event6(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 2, 7);
UserEvent userEvent1(&pCmdQ1->getContext());
UserEvent userEvent2(&pCmdQ2->getContext());
userEvent1.setStatus(CL_COMPLETE);
userEvent2.setStatus(CL_COMPLETE);
{
cl_event eventWaitList[] =
{
&event1,
&event2,
&event3,
&event4,
&event5,
&event6,
&userEvent1,
};
cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]);
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, nullptr);
CsrDependencies csrDeps;
eventsRequest.fillCsrDependenciesForRootDevices(csrDeps, pCmdQ2->getGpgpuCommandStreamReceiver());
EXPECT_EQ(0u, csrDeps.multiRootTimeStampSyncContainer.size());
}
}
HWTEST_F(CrossDeviceDependenciesTests, givenMultipleEventInMultiRootDeviceEnvironmentWhenTheyDoNotHaveMultiRootSyncNodeContainersThenCsrDepsDoesNotHaveAnyMultiRootSyncContainer) {
MockEvent<Event> event1(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 5, 15);
event1.multiRootDeviceTimestampPacketContainer.reset(new TimestampPacketContainer());
MockEvent<Event> event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 6, 16);
MockEvent<Event> event3(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 4, 20);
event3.multiRootDeviceTimestampPacketContainer.reset(new TimestampPacketContainer());
MockEvent<Event> event4(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 3, 4);
event4.multiRootDeviceTimestampPacketContainer.reset(new TimestampPacketContainer());
MockEvent<Event> event5(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 3, 4);
event5.multiRootDeviceTimestampPacketContainer.reset(new TimestampPacketContainer());
MockEvent<Event> event6(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 2, 7);
event6.multiRootDeviceTimestampPacketContainer.reset(new TimestampPacketContainer());
UserEvent userEvent1(&pCmdQ1->getContext());
userEvent1.setStatus(CL_COMPLETE);
{
cl_event eventWaitList[] =
{
&event1,
&event2,
&event3,
&event4,
&event5,
&event6,
&userEvent1,
};
cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]);
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, nullptr);
CsrDependencies csrDeps;
eventsRequest.fillCsrDependenciesForRootDevices(csrDeps, pCmdQ2->getGpgpuCommandStreamReceiver());
EXPECT_EQ(0u, csrDeps.multiRootTimeStampSyncContainer.size());
}
}

View File

@ -11,6 +11,7 @@
#include "shared/source/command_stream/wait_status.h"
#include "shared/source/helpers/constants.h"
#include "shared/source/helpers/logical_state_helper.h"
#include "shared/source/os_interface/device_factory.h"
#include "shared/source/os_interface/hw_info_config.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/engine_descriptor_helper.h"

View File

@ -1791,4 +1791,4 @@ HWTEST_F(BcsTests, givenHostPtrToImageWhenBlitBufferIsCalledThenBlitCmdIsFound)
hwParser.parseCommands<FamilyType>(csr.commandStream, 0);
auto cmdIterator = find<typename FamilyType::XY_BLOCK_COPY_BLT *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
EXPECT_NE(hwParser.cmdList.end(), cmdIterator);
}
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2022 Intel Corporation
* Copyright (C) 2018-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -79,7 +79,7 @@ TEST(EventBuilder, givenVirtualEventWithCommandThenFinalizeAddChild) {
public:
using CommandComputeKernel::eventsWaitlist;
MockCommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> &kernelOperation, std::vector<Surface *> &surfaces, Kernel *kernel)
: CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0) {}
: CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0, nullptr) {}
};
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
@ -129,7 +129,7 @@ TEST(EventBuilder, givenVirtualEventWithSubmittedCommandAsParentThenFinalizeNotA
public:
using CommandComputeKernel::eventsWaitlist;
MockCommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> &kernelOperation, std::vector<Surface *> &surfaces, Kernel *kernel)
: CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0) {}
: CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0, nullptr) {}
};
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));

View File

@ -485,7 +485,7 @@ TEST_F(InternalsEventTest, GivenSubmitCommandFalseWhenSubmittingCommandsThenRefA
PreemptionMode preemptionMode = pDevice->getPreemptionMode();
v.push_back(bufferSurf);
auto cmd = new CommandComputeKernel(cmdQ, blockedCommandsData, v, false, false, false, nullptr, preemptionMode, pKernel, 1);
auto cmd = new CommandComputeKernel(cmdQ, blockedCommandsData, v, false, false, false, nullptr, preemptionMode, pKernel, 1, nullptr);
event.setCommand(std::unique_ptr<Command>(cmd));
auto taskLevelBefore = csr.peekTaskLevel();
@ -528,7 +528,7 @@ TEST_F(InternalsEventTest, GivenSubmitCommandTrueWhenSubmittingCommandsThenRefAp
NullSurface *surface = new NullSurface;
v.push_back(surface);
PreemptionMode preemptionMode = pDevice->getPreemptionMode();
auto cmd = new CommandComputeKernel(cmdQ, blockedCommandsData, v, false, false, false, nullptr, preemptionMode, pKernel, 1);
auto cmd = new CommandComputeKernel(cmdQ, blockedCommandsData, v, false, false, false, nullptr, preemptionMode, pKernel, 1, nullptr);
event.setCommand(std::unique_ptr<Command>(cmd));
auto taskLevelBefore = csr.peekTaskLevel();
@ -579,7 +579,7 @@ TEST_F(InternalsEventTest, givenBlockedKernelWithPrintfWhenSubmittedThenPrintOut
std::vector<Surface *> v;
PreemptionMode preemptionMode = pDevice->getPreemptionMode();
auto cmd = new CommandComputeKernel(mockCmdQueue, blockedCommandsData, v, false, false, false, std::move(printfHandler), preemptionMode, pKernel, 1);
auto cmd = new CommandComputeKernel(mockCmdQueue, blockedCommandsData, v, false, false, false, std::move(printfHandler), preemptionMode, pKernel, 1, nullptr);
event.setCommand(std::unique_ptr<Command>(cmd));
event.submitCommand(false);
@ -631,7 +631,7 @@ TEST_F(InternalsEventTest, givenGpuHangOnCmdQueueWaitFunctionAndBlockedKernelWit
std::vector<Surface *> v;
PreemptionMode preemptionMode = pDevice->getPreemptionMode();
auto cmd = new CommandComputeKernel(mockCmdQueue, blockedCommandsData, v, false, false, false, std::move(printfHandler), preemptionMode, pKernel, 1);
auto cmd = new CommandComputeKernel(mockCmdQueue, blockedCommandsData, v, false, false, false, std::move(printfHandler), preemptionMode, pKernel, 1, nullptr);
event.setCommand(std::unique_ptr<Command>(cmd));
event.submitCommand(false);
@ -680,7 +680,7 @@ TEST_F(InternalsEventTest, givenGpuHangOnPrintingEnqueueOutputAndBlockedKernelWi
std::vector<Surface *> v;
PreemptionMode preemptionMode = pDevice->getPreemptionMode();
auto cmd = new CommandComputeKernel(mockCmdQueue, blockedCommandsData, v, false, false, false, std::move(printfHandler), preemptionMode, pKernel, 1);
auto cmd = new CommandComputeKernel(mockCmdQueue, blockedCommandsData, v, false, false, false, std::move(printfHandler), preemptionMode, pKernel, 1, nullptr);
event.setCommand(std::unique_ptr<Command>(cmd));
event.submitCommand(false);
@ -1169,7 +1169,7 @@ HWTEST_F(EventTest, givenVirtualEventWhenCommandSubmittedThenLockCsrOccurs) {
public:
using CommandComputeKernel::eventsWaitlist;
MockCommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> &kernelOperation, std::vector<Surface *> &surfaces, Kernel *kernel)
: CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0) {}
: CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0, nullptr) {}
};
class MockEvent : public Event {
public:
@ -1750,7 +1750,7 @@ HWTEST_F(InternalsEventTest, givenAbortedCommandWhenSubmitCalledThenDontUpdateFl
blockedCommandsData->setHeaps(dsh, ioh, ssh);
PreemptionMode preemptionMode = pDevice->getPreemptionMode();
std::vector<Surface *> v;
auto cmd = new CommandComputeKernel(*pCmdQ, blockedCommandsData, v, false, false, false, nullptr, preemptionMode, pKernel, 1);
auto cmd = new CommandComputeKernel(*pCmdQ, blockedCommandsData, v, false, false, false, nullptr, preemptionMode, pKernel, 1, nullptr);
event->setCommand(std::unique_ptr<Command>(cmd));
FlushStamp expectedFlushStamp = 0;
@ -1893,3 +1893,35 @@ TEST(EventTimestampTest, givenEnableTimestampWaitWhenCheckIsTimestampWaitEnabled
EXPECT_TRUE(event.isWaitForTimestampsEnabled());
}
}
TEST(MultiRootEvent, givenContextWithMultiRootTagAllocatorWhenEventGetsTagThenNewAllocatorIsNotCreated) {
auto mockDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
MockContext context{};
MockCommandQueue cmdQ(&context, mockDevice.get(), 0, false);
auto allocator = cmdQ.getGpgpuCommandStreamReceiver().createMultiRootDeviceTimestampPacketAllocator(context.getRootDeviceIndices());
auto allocatorPtr = allocator.get();
context.setMultiRootDeviceTimestampPacketAllocator(allocator);
MockEvent<Event> event{&cmdQ, CL_COMMAND_MARKER, 0, 0};
event.getMultiRootTimestampSyncNode();
EXPECT_EQ(allocatorPtr, context.getMultiRootDeviceTimestampPacketAllocator());
}
TEST(MultiRootEvent, givenContextWithoutMultiRootTagAllocatorWhenEventGetsTagThenNewAllocatorIsCreated) {
auto mockDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
MockContext context{};
MockCommandQueue cmdQ(&context, mockDevice.get(), 0, false);
EXPECT_EQ(context.getMultiRootDeviceTimestampPacketAllocator(), nullptr);
MockEvent<Event> event{&cmdQ, CL_COMMAND_MARKER, 0, 0};
event.getMultiRootTimestampSyncNode();
EXPECT_NE(context.getMultiRootDeviceTimestampPacketAllocator(), nullptr);
}
TEST(MultiRootEvent, givenEventWithTagWhenEventGetsNewTagThenNewTagContainerIsNotCreated) {
auto mockDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
MockContext context{};
MockCommandQueue cmdQ(&context, mockDevice.get(), 0, false);
MockEvent<Event> event{&cmdQ, CL_COMMAND_MARKER, 0, 0};
EXPECT_EQ(event.getMultiRootDeviceTimestampPacketNodes(), nullptr);
event.getMultiRootTimestampSyncNode();
auto containerPtr = event.getMultiRootDeviceTimestampPacketNodes();
EXPECT_NE(containerPtr, nullptr);
event.getMultiRootTimestampSyncNode();
EXPECT_EQ(containerPtr, event.getMultiRootDeviceTimestampPacketNodes());
}

View File

@ -2435,7 +2435,7 @@ HWTEST_F(GTPinTests, givenGtPinInitializedWhenSubmittingKernelCommandThenFlushed
gtpinNotifyKernelSubmit(kernel.mockMultiDeviceKernel, mockCmdQ.get());
std::unique_ptr<Command> command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, ndRangeKernel, nullptr, preemptionMode, kernel, 1));
std::unique_ptr<Command> command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, ndRangeKernel, nullptr, preemptionMode, kernel, 1, nullptr));
CompletionStamp stamp = command->submit(20, false);
ASSERT_EQ(1u, kernelExecQueue.size());

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2022 Intel Corporation
* Copyright (C) 2018-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -154,7 +154,7 @@ TEST(CommandTest, givenWaitlistRequestWhenCommandComputeKernelIsCreatedThenMakeL
public:
using CommandComputeKernel::eventsWaitlist;
MockCommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> &kernelOperation, std::vector<Surface *> &surfaces, Kernel *kernel)
: CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0) {}
: CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0, nullptr) {}
};
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
@ -291,7 +291,7 @@ HWTEST_F(DispatchFlagsTests, givenCommandComputeKernelWhenSubmitThenPassCorrectD
for (auto &surface : surfaces) {
requiresCoherency |= surface->IsCoherent;
}
std::unique_ptr<Command> command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, ndRangeKernel, nullptr, preemptionMode, kernel, 1));
std::unique_ptr<Command> command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, ndRangeKernel, nullptr, preemptionMode, kernel, 1, nullptr));
command->submit(20, false);
EXPECT_FALSE(mockCsr->passedDispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode);
@ -339,7 +339,7 @@ HWTEST_F(DispatchFlagsTests, givenClCommandCopyImageWhenSubmitThenFlushTextureCa
for (auto &surface : surfaces) {
requiresCoherency |= surface->IsCoherent;
}
std::unique_ptr<Command> command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, commandType, nullptr, preemptionMode, kernel, 1));
std::unique_ptr<Command> command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, commandType, nullptr, preemptionMode, kernel, 1, nullptr));
command->submit(20, false);
EXPECT_FALSE(mockCsr->passedDispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode);
@ -425,7 +425,7 @@ HWTEST_F(DispatchFlagsTests, givenCommandComputeKernelWhenSubmitThenPassCorrectD
bool flushDC = false;
bool slmUsed = false;
bool ndRangeKernel = false;
std::unique_ptr<Command> command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, ndRangeKernel, nullptr, preemptionMode, kernel, 1));
std::unique_ptr<Command> command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, ndRangeKernel, nullptr, preemptionMode, kernel, 1, nullptr));
command->submit(20, false);
EXPECT_TRUE(mockCsr->passedDispatchFlags.epilogueRequired);

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2022 Intel Corporation
* Copyright (C) 2018-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -34,11 +34,11 @@ HWCMDTEST_F(IGFX_GEN8_CORE, TimestampPacketTests, givenTimestampPacketWriteEnabl
auto mockCmdQHw = std::make_unique<MockCommandQueueHw<FamilyType>>(context, device.get(), nullptr);
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = false;
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*mockCmdQHw, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false);
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*mockCmdQHw, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false, nullptr);
auto sizeWithDisabled = mockCmdQHw->requestedCmdStreamSize;
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*mockCmdQHw, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false);
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*mockCmdQHw, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false, nullptr);
auto sizeWithEnabled = mockCmdQHw->requestedCmdStreamSize;
auto extendedSize = sizeWithDisabled + sizeof(typename FamilyType::PIPE_CONTROL);
@ -52,7 +52,7 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledAndOoqWhenEstimat
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = false;
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*mockCmdQHw, CsrDependencies(), false, false,
false, multiDispatchInfo, nullptr, 0, false, false);
false, multiDispatchInfo, nullptr, 0, false, false, nullptr);
auto sizeWithDisabled = mockCmdQHw->requestedCmdStreamSize;
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
@ -82,7 +82,7 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledAndOoqWhenEstimat
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(
csrDeps, device->getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr);
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*mockCmdQHw, csrDeps, false, false, false, multiDispatchInfo, nullptr, 0, false, false);
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*mockCmdQHw, csrDeps, false, false, false, multiDispatchInfo, nullptr, 0, false, false, nullptr);
auto sizeWithEnabled = mockCmdQHw->requestedCmdStreamSize;
size_t sizeForNodeDependency = 0;
@ -143,7 +143,7 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEstimatingStr
auto mockCmdQHw = std::make_unique<MockCommandQueueHw<FamilyType>>(context, device.get(), nullptr);
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = false;
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*mockCmdQHw, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false);
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*mockCmdQHw, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false, nullptr);
auto sizeWithDisabled = mockCmdQHw->requestedCmdStreamSize;
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
@ -172,7 +172,7 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEstimatingStr
CsrDependencies csrDeps;
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, device->getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr);
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*mockCmdQHw, csrDeps, false, false, false, multiDispatchInfo, nullptr, 0, false, false);
getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*mockCmdQHw, csrDeps, false, false, false, multiDispatchInfo, nullptr, 0, false, false, nullptr);
auto sizeWithEnabled = mockCmdQHw->requestedCmdStreamSize;
size_t sizeForNodeDependency = 0;

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2022 Intel Corporation
* Copyright (C) 2019-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -212,7 +212,7 @@ HWTEST2_F(KernelWithCacheFlushTests, givenCacheFlushRequiredWhenEstimatingThenAd
{
EXPECT_FALSE(mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ));
initialSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, csrDeps, false, false, false, *cmdQ, multiDispatchInfo, false, false);
initialSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, csrDeps, false, false, false, *cmdQ, multiDispatchInfo, false, false, nullptr);
}
{
@ -226,7 +226,7 @@ HWTEST2_F(KernelWithCacheFlushTests, givenCacheFlushRequiredWhenEstimatingThenAd
ultCsr.multiOsContextCapable = false;
EXPECT_TRUE(mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ));
sizeWithCacheFlush = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, csrDeps, false, false, false, *cmdQ, multiDispatchInfo, false, false);
sizeWithCacheFlush = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, csrDeps, false, false, false, *cmdQ, multiDispatchInfo, false, false, nullptr);
}
EXPECT_EQ(initialSize + expectedDiff, sizeWithCacheFlush);

View File

@ -504,6 +504,7 @@ class CommandStreamReceiverMock : public CommandStreamReceiver {
using BaseClass::CommandStreamReceiver;
TagAllocatorBase *getTimestampPacketAllocator() override { return nullptr; }
std::unique_ptr<TagAllocatorBase> createMultiRootDeviceTimestampPacketAllocator(const RootDeviceIndicesContainer rootDeviceIndices) override { return std::unique_ptr<TagAllocatorBase>(nullptr); }
SubmissionStatus flushTagUpdate() override { return SubmissionStatus::SUCCESS; };
void updateTagFromWait() override{};

View File

@ -766,11 +766,11 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBufferOperationWithoutKernelWhenEstimati
auto &hwInfo = cmdQ->getDevice().getHardwareInfo();
auto readBufferCmdsSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_READ_BUFFER, csrDependencies, false, false,
true, *cmdQ, multiDispatchInfo, false, false);
true, *cmdQ, multiDispatchInfo, false, false, nullptr);
auto writeBufferCmdsSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_WRITE_BUFFER, csrDependencies, false, false,
true, *cmdQ, multiDispatchInfo, false, false);
true, *cmdQ, multiDispatchInfo, false, false, nullptr);
auto copyBufferCmdsSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_COPY_BUFFER, csrDependencies, false, false,
true, *cmdQ, multiDispatchInfo, false, false);
true, *cmdQ, multiDispatchInfo, false, false, nullptr);
auto expectedSize = TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue<FamilyType>();
if (cmdQ->isCacheFlushForBcsRequired()) {

View File

@ -249,6 +249,7 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
using BaseClass::latestSentEnqueueType;
using BaseClass::obtainCommandStream;
using BaseClass::obtainNewTimestampPacketNodes;
using BaseClass::processDispatchForKernels;
using BaseClass::requiresCacheFlushAfterWalker;
using BaseClass::throttle;
using BaseClass::timestampPacketContainer;

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2022 Intel Corporation
* Copyright (C) 2018-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -39,6 +39,7 @@ struct MockEvent : public BaseEventType {
using Event::calculateSubmitTimestampData;
using Event::isWaitForTimestampsEnabled;
using Event::magic;
using Event::multiRootDeviceTimestampPacketContainer;
using Event::queueTimeStamp;
using Event::submitTimeStamp;
using Event::timestampPacketContainer;

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2022 Intel Corporation
* Copyright (C) 2018-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -71,13 +71,13 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfilingAndFor
MultiDispatchInfo multiDispatchInfo(&kernel);
auto &commandStreamNDRangeKernel = getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, CsrDependencies(), true, false, false,
multiDispatchInfo, nullptr, 0, false, false);
multiDispatchInfo, nullptr, 0, false, false, nullptr);
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, true, false, *pCmdQ, &kernel, {});
EXPECT_GE(expectedSizeCS, requiredSize);
EXPECT_GE(commandStreamNDRangeKernel.getAvailableSpace(), requiredSize);
auto &commandStreamTask = getCommandStream<FamilyType, CL_COMMAND_TASK>(*pCmdQ, CsrDependencies(), true, false, false,
multiDispatchInfo, nullptr, 0, false, false);
multiDispatchInfo, nullptr, 0, false, false, nullptr);
expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_TASK, true, false, *pCmdQ, &kernel, {});
EXPECT_GE(expectedSizeCS, requiredSize);
EXPECT_GE(commandStreamTask.getAvailableSpace(), requiredSize);
@ -93,13 +93,13 @@ HWTEST_F(ProfilingTests, GivenCommandQueueWithProfilingAndForWorkloadWithNoKerne
MultiDispatchInfo multiDispatchInfo(nullptr);
auto &commandStreamMigrateMemObjects = getCommandStream<FamilyType, CL_COMMAND_MIGRATE_MEM_OBJECTS>(*pCmdQ, CsrDependencies(),
true, false, false,
multiDispatchInfo, nullptr, 0, false, false);
multiDispatchInfo, nullptr, 0, false, false, nullptr);
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_MIGRATE_MEM_OBJECTS, true, false, *pCmdQ, nullptr, {});
EXPECT_GE(expectedSizeCS, requiredSize);
EXPECT_GE(commandStreamMigrateMemObjects.getAvailableSpace(), requiredSize);
auto &commandStreamMarker = getCommandStream<FamilyType, CL_COMMAND_MARKER>(*pCmdQ, CsrDependencies(), true,
false, false, multiDispatchInfo, nullptr, 0, false, false);
false, false, multiDispatchInfo, nullptr, 0, false, false, nullptr);
expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_MARKER, true, false, *pCmdQ, nullptr, {});
EXPECT_GE(expectedSizeCS, requiredSize);
EXPECT_GE(commandStreamMarker.getAvailableSpace(), requiredSize);
@ -121,9 +121,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfilingAndFor
multiDispatchInfo.push(dispatchInfo);
multiDispatchInfo.push(dispatchInfo);
auto &commandStreamTask = getCommandStream<FamilyType, CL_COMMAND_TASK>(*pCmdQ, CsrDependencies(), true, false, false,
multiDispatchInfo, nullptr, 0, false, false);
multiDispatchInfo, nullptr, 0, false, false, nullptr);
auto expectedSizeCS = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_TASK, CsrDependencies(), true, false,
false, *pCmdQ, multiDispatchInfo, false, false);
false, *pCmdQ, multiDispatchInfo, false, false, nullptr);
EXPECT_GE(expectedSizeCS, requiredSize);
EXPECT_GE(commandStreamTask.getAvailableSpace(), requiredSize);
}
@ -741,13 +741,13 @@ HWTEST_F(ProfilingWithPerfCountersTests, GivenCommandQueueWithProfilingPerfCount
MultiDispatchInfo multiDispatchInfo(nullptr);
auto &commandStreamMigrateMemObjects = getCommandStream<FamilyType, CL_COMMAND_MIGRATE_MEM_OBJECTS>(*pCmdQ, CsrDependencies(),
true, true, false, multiDispatchInfo,
nullptr, 0, false, false);
nullptr, 0, false, false, nullptr);
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_MIGRATE_MEM_OBJECTS, true, true, *pCmdQ, nullptr, {});
EXPECT_GE(expectedSizeCS, requiredSize);
EXPECT_GE(commandStreamMigrateMemObjects.getAvailableSpace(), requiredSize);
auto &commandStreamMarker = getCommandStream<FamilyType, CL_COMMAND_MARKER>(*pCmdQ, CsrDependencies(), true, true, false,
multiDispatchInfo, nullptr, 0, false, false);
multiDispatchInfo, nullptr, 0, false, false, nullptr);
expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_MARKER, true, true, *pCmdQ, nullptr, {});
EXPECT_GE(expectedSizeCS, requiredSize);
EXPECT_GE(commandStreamMarker.getAvailableSpace(), requiredSize);

View File

@ -243,10 +243,14 @@ PVCTEST_F(PvcMultiRootDeviceCommandStreamReceiverBufferTests, givenMultipleEvent
MockGraphicsAllocation svmAlloc(svmPtr, svmSize);
Event event1(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 5, 15);
auto node1 = event1.getMultiRootTimestampSyncNode();
Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 6, 16);
Event event3(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 4, 20);
auto node3 = event3.getMultiRootTimestampSyncNode();
Event event4(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 3, 4);
auto node4 = event4.getMultiRootTimestampSyncNode();
Event event5(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 2, 7);
auto node5 = event5.getMultiRootTimestampSyncNode();
UserEvent userEvent1(&pCmdQ1->getContext());
UserEvent userEvent2(&pCmdQ2->getContext());
@ -285,12 +289,12 @@ PVCTEST_F(PvcMultiRootDeviceCommandStreamReceiverBufferTests, givenMultipleEvent
EXPECT_EQ(3u, semaphores.size());
auto semaphoreCmd0 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]));
EXPECT_EQ(4u, semaphoreCmd0->getSemaphoreDataDword());
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress());
EXPECT_EQ(1u, semaphoreCmd0->getSemaphoreDataDword());
EXPECT_EQ(reinterpret_cast<uint64_t>(node4->getContextEndAddress(0u)), semaphoreCmd0->getSemaphoreGraphicsAddress());
auto semaphoreCmd1 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[1]));
EXPECT_EQ(7u, semaphoreCmd1->getSemaphoreDataDword());
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress());
EXPECT_EQ(1u, semaphoreCmd1->getSemaphoreDataDword());
EXPECT_EQ(reinterpret_cast<uint64_t>(node5->getContextEndAddress(0u)), semaphoreCmd1->getSemaphoreGraphicsAddress());
}
{
@ -313,12 +317,12 @@ PVCTEST_F(PvcMultiRootDeviceCommandStreamReceiverBufferTests, givenMultipleEvent
EXPECT_EQ(3u, semaphores.size());
auto semaphoreCmd0 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]));
EXPECT_EQ(15u, semaphoreCmd0->getSemaphoreDataDword());
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress());
EXPECT_EQ(1u, semaphoreCmd0->getSemaphoreDataDword());
EXPECT_EQ(reinterpret_cast<uint64_t>(node1->getContextEndAddress(0u)), semaphoreCmd0->getSemaphoreGraphicsAddress());
auto semaphoreCmd1 = genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[1]));
EXPECT_EQ(20u, semaphoreCmd1->getSemaphoreDataDword());
EXPECT_EQ(reinterpret_cast<uint64_t>(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress());
EXPECT_EQ(1u, semaphoreCmd1->getSemaphoreDataDword());
EXPECT_EQ(reinterpret_cast<uint64_t>(node3->getContextEndAddress(0u)), semaphoreCmd1->getSemaphoreGraphicsAddress());
}
alignedFree(svmPtr);
}

View File

@ -221,6 +221,7 @@ class CommandStreamReceiver {
TagAllocatorBase *getEventTsAllocator();
TagAllocatorBase *getEventPerfCountAllocator(const uint32_t tagSize);
virtual TagAllocatorBase *getTimestampPacketAllocator() = 0;
virtual std::unique_ptr<TagAllocatorBase> createMultiRootDeviceTimestampPacketAllocator(const RootDeviceIndicesContainer rootDeviceIndices) = 0;
virtual bool expectMemory(const void *gfxAddress, const void *srcAddress, size_t length, uint32_t compareOperation);

View File

@ -130,6 +130,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
GraphicsAllocation *getClearColorAllocation() override;
TagAllocatorBase *getTimestampPacketAllocator() override;
std::unique_ptr<TagAllocatorBase> createMultiRootDeviceTimestampPacketAllocator(const RootDeviceIndicesContainer rootDeviceIndices) override;
void postInitFlagsSetup() override;
void programActivePartitionConfig(LinearStream &csr);

View File

@ -406,7 +406,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
auto commandStreamStartCSR = commandStreamCSR.getUsed();
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(commandStreamCSR, dispatchFlags.csrDependencies);
TimestampPacketHelper::programCsrDependenciesForForTaskCountContainer<GfxFamily>(commandStreamCSR, dispatchFlags.csrDependencies);
TimestampPacketHelper::programCsrDependenciesForForMultiRootDeviceSyncContainer<GfxFamily>(commandStreamCSR, dispatchFlags.csrDependencies);
programActivePartitionConfigFlushTask(commandStreamCSR);
programEngineModeCommands(commandStreamCSR, dispatchFlags);
@ -980,7 +980,7 @@ size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdStreamSize(const Dispat
}
size += TimestampPacketHelper::getRequiredCmdStreamSize<GfxFamily>(dispatchFlags.csrDependencies);
size += TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer<GfxFamily>(dispatchFlags.csrDependencies);
size += TimestampPacketHelper::getRequiredCmdStreamSizeForMultiRootDeviceSyncNodesContainer<GfxFamily>(dispatchFlags.csrDependencies);
size += EncodeKernelArgsBuffer<GfxFamily>::getKernelArgsBufferCmdsSize(kernelArgsBufferAllocation, logicalStateHelper.get());
@ -1196,7 +1196,7 @@ TaskCountType CommandStreamReceiverHw<GfxFamily>::flushBcsTask(const BlitPropert
for (auto &blitProperties : blitPropertiesContainer) {
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(commandStream, blitProperties.csrDependencies);
TimestampPacketHelper::programCsrDependenciesForForTaskCountContainer<GfxFamily>(commandStream, blitProperties.csrDependencies);
TimestampPacketHelper::programCsrDependenciesForForMultiRootDeviceSyncContainer<GfxFamily>(commandStream, blitProperties.csrDependencies);
BlitCommandsHelper<GfxFamily>::encodeWa(commandStream, blitProperties, latestSentBcsWaValue);
@ -1229,6 +1229,12 @@ TaskCountType CommandStreamReceiverHw<GfxFamily>::flushBcsTask(const BlitPropert
if (blitProperties.clearColorAllocation) {
makeResident(*blitProperties.clearColorAllocation);
}
if (blitProperties.multiRootDeviceEventSync != nullptr) {
MiFlushArgs args;
args.commandWithPostSync = true;
args.notifyEnable = isUsedNotifyEnableForPostSync();
EncodeMiFlushDW<GfxFamily>::programMiFlushDw(commandStream, blitProperties.multiRootDeviceEventSync->getGpuAddress() + blitProperties.multiRootDeviceEventSync->getContextEndOffset(), std::numeric_limits<uint64_t>::max(), args, hwInfo);
}
}
BlitCommandsHelper<GfxFamily>::programGlobalSequencerFlush(commandStream);
@ -1245,7 +1251,6 @@ TaskCountType CommandStreamReceiverHw<GfxFamily>::flushBcsTask(const BlitPropert
MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(commandStream, tagAllocation->getGpuAddress(), false, peekHwInfo());
}
if (PauseOnGpuProperties::pauseModeAllowed(DebugManager.flags.PauseOnBlitCopy.get(), taskCount, PauseOnGpuProperties::PauseMode::AfterWorkload)) {
BlitCommandsHelper<GfxFamily>::dispatchDebugPauseCommands(commandStream, getDebugPauseStateGPUAddress(),
DebugPauseState::waitingForUserEndConfirmation,
@ -1522,6 +1527,11 @@ TagAllocatorBase *CommandStreamReceiverHw<GfxFamily>::getTimestampPacketAllocato
return timestampPacketAllocator.get();
}
template <typename GfxFamily>
std::unique_ptr<TagAllocatorBase> CommandStreamReceiverHw<GfxFamily>::createMultiRootDeviceTimestampPacketAllocator(const RootDeviceIndicesContainer rootDeviceIndices) {
auto &gfxCoreHelper = getGfxCoreHelper();
return gfxCoreHelper.createTimestampPacketAllocator(rootDeviceIndices, getMemoryManager(), getPreferredTagPoolSize(), getType(), osContext->getDeviceBitfield());
}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::postInitFlagsSetup() {
useNewResourceImplicitFlush = checkPlatformSupportsNewResourceImplicitFlush();

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2022 Intel Corporation
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -22,7 +22,7 @@ class CsrDependencies {
All
};
StackVec<std::pair<TaskCountType, uint64_t>, 32> taskCountContainer;
StackVec<TimestampPacketContainer *, 32> multiRootTimeStampSyncContainer;
StackVec<TimestampPacketContainer *, 32> timestampPacketContainer;
void makeResident(CommandStreamReceiver &commandStreamReceiver) const;

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2022 Intel Corporation
* Copyright (C) 2019-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -50,6 +50,7 @@ BlitProperties BlitProperties::constructPropertiesForReadWrite(BlitterConstants:
BlitterConstants::BlitDirection::HostPtrToImage == blitDirection) {
return {
nullptr, // outputTimestampPacket
nullptr, // multiRootDeviceEventSync
blitDirection, // blitDirection
{}, // csrDependencies
AuxTranslationDirection::None, // auxTranslationDirection
@ -73,6 +74,7 @@ BlitProperties BlitProperties::constructPropertiesForReadWrite(BlitterConstants:
} else {
return {
nullptr, // outputTimestampPacket
nullptr, // multiRootDeviceEventSync
blitDirection, // blitDirection
{}, // csrDependencies
AuxTranslationDirection::None, // auxTranslationDirection
@ -104,6 +106,7 @@ BlitProperties BlitProperties::constructPropertiesForCopy(GraphicsAllocation *ds
return {
nullptr, // outputTimestampPacket
nullptr, // multiRootDeviceEventSync
BlitterConstants::BlitDirection::BufferToBuffer, // blitDirection
{}, // csrDependencies
AuxTranslationDirection::None, // auxTranslationDirection
@ -128,6 +131,7 @@ BlitProperties BlitProperties::constructPropertiesForAuxTranslation(AuxTranslati
auto allocationSize = allocation->getUnderlyingBufferSize();
return {
nullptr, // outputTimestampPacket
nullptr, // multiRootDeviceEventSync
BlitterConstants::BlitDirection::BufferToBuffer, // blitDirection
{}, // csrDependencies
auxTranslationDirection, // auxTranslationDirection

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2022 Intel Corporation
* Copyright (C) 2019-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -61,6 +61,7 @@ struct BlitProperties {
CommandStreamReceiver &gpguCsr, CommandStreamReceiver &bcsCsr);
TagNodeBase *outputTimestampPacket = nullptr;
TagNodeBase *multiRootDeviceEventSync = nullptr;
BlitterConstants::BlitDirection blitDirection = BlitterConstants::BlitDirection::BufferToHostPtr;
CsrDependencies csrDependencies;
AuxTranslationDirection auxTranslationDirection = AuxTranslationDirection::None;

View File

@ -127,7 +127,7 @@ size_t BlitCommandsHelper<GfxFamily>::estimateBlitCommandSize(const Vec3<size_t>
sizePerBlit += estimatePostBlitCommandSize();
return TimestampPacketHelper::getRequiredCmdStreamSize<GfxFamily>(csrDependencies) +
TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer<GfxFamily>(csrDependencies) +
TimestampPacketHelper::getRequiredCmdStreamSizeForMultiRootDeviceSyncNodesContainer<GfxFamily>(csrDependencies) +
(sizePerBlit * nBlits) +
timestampCmdSize +
estimatePreBlitCommandSize();
@ -143,6 +143,9 @@ size_t BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(const BlitPropert
auto isImage = blitProperties.isImageOperation();
size += BlitCommandsHelper<GfxFamily>::estimateBlitCommandSize(blitProperties.copySize, blitProperties.csrDependencies, updateTimestampPacket,
profilingEnabled, isImage, rootDeviceEnvironment, blitProperties.isSystemMemoryPoolUsed);
if (blitProperties.multiRootDeviceEventSync != nullptr) {
size += EncodeMiFlushDW<GfxFamily>::getMiFlushDwCmdSizeForDataWrite();
}
}
size += BlitCommandsHelper<GfxFamily>::getWaCmdsSize(blitPropertiesContainer);
size += 2 * MemorySynchronizationCommands<GfxFamily>::getSizeForAdditonalSynchronization(*rootDeviceEnvironment.getHardwareInfo());

View File

@ -144,17 +144,11 @@ struct TimestampPacketHelper {
}
template <typename GfxFamily>
static void programCsrDependenciesForForTaskCountContainer(LinearStream &cmdStream, const CsrDependencies &csrDependencies) {
auto &taskCountContainer = csrDependencies.taskCountContainer;
for (auto &[taskCountPreviousRootDevice, tagAddressPreviousRootDevice] : taskCountContainer) {
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
EncodeSempahore<GfxFamily>::addMiSemaphoreWaitCommand(cmdStream,
static_cast<uint64_t>(tagAddressPreviousRootDevice),
static_cast<uint32_t>(taskCountPreviousRootDevice),
COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD);
static void programCsrDependenciesForForMultiRootDeviceSyncContainer(LinearStream &cmdStream, const CsrDependencies &csrDependencies) {
for (auto timestampPacketContainer : csrDependencies.multiRootTimeStampSyncContainer) {
for (auto &node : timestampPacketContainer->peekNodes()) {
TimestampPacketHelper::programSemaphore<GfxFamily>(cmdStream, *node);
}
}
}
@ -217,8 +211,8 @@ struct TimestampPacketHelper {
}
template <typename GfxFamily>
static size_t getRequiredCmdStreamSizeForTaskCountContainer(const CsrDependencies &csrDependencies) {
return csrDependencies.taskCountContainer.size() * sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT);
static size_t getRequiredCmdStreamSizeForMultiRootDeviceSyncNodesContainer(const CsrDependencies &csrDependencies) {
return csrDependencies.multiRootTimeStampSyncContainer.size() * sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT);
}
};

View File

@ -18,6 +18,7 @@
#include "shared/source/memory_manager/graphics_allocation.h"
#include "shared/source/memory_manager/surface.h"
#include "shared/source/os_interface/os_context.h"
#include "shared/source/utilities/tag_allocator.h"
#include "shared/test/common/helpers/dispatch_flags_helper.h"
#include <optional>
@ -94,6 +95,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
};
TagAllocatorBase *getTimestampPacketAllocator() override { return nullptr; }
std::unique_ptr<TagAllocatorBase> createMultiRootDeviceTimestampPacketAllocator(const RootDeviceIndicesContainer rootDeviceIndices) override { return std::unique_ptr<TagAllocatorBase>(nullptr); }
CompletionStamp flushTask(
LinearStream &commandStream,

View File

@ -15,10 +15,13 @@
#include "shared/source/helpers/api_specific_config.h"
#include "shared/source/memory_manager/internal_allocation_storage.h"
#include "shared/source/memory_manager/surface.h"
#include "shared/source/memory_manager/unified_memory_manager.h"
#include "shared/source/os_interface/device_factory.h"
#include "shared/source/os_interface/hw_info_config.h"
#include "shared/source/os_interface/os_interface.h"
#include "shared/source/utilities/tag_allocator.h"
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include "shared/test/common/cmd_parse/hw_parse.h"
#include "shared/test/common/fixtures/command_stream_receiver_fixture.inl"
#include "shared/test/common/fixtures/device_fixture.h"
#include "shared/test/common/helpers/batch_buffer_helper.h"
@ -32,6 +35,7 @@
#include "shared/test/common/mocks/mock_execution_environment.h"
#include "shared/test/common/mocks/mock_internal_allocation_storage.h"
#include "shared/test/common/mocks/mock_memory_manager.h"
#include "shared/test/common/mocks/mock_timestamp_container.h"
#include "shared/test/common/mocks/ult_device_factory.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "shared/test/common/test_macros/test_checks_shared.h"
@ -2463,3 +2467,89 @@ HWTEST_F(CommandStreamReceiverHwTest, givenVariousCsrModeWhenGettingTbxModeThenE
ultCsr.commandStreamReceiverType = CommandStreamReceiverType::CSR_TBX_WITH_AUB;
EXPECT_TRUE(ultCsr.isTbxMode());
}
HWTEST_F(CommandStreamReceiverHwTest, GivenTwoRootDevicesWhengetMultiRootDeviceTimestampPacketAllocatorCalledThenAllocatorForTwoDevicesCreated) {
auto executionEnvironment = std::make_unique<MockExecutionEnvironment>(defaultHwInfo.get(), true, 2u);
auto devices = DeviceFactory::createDevices(*executionEnvironment.release());
const RootDeviceIndicesContainer indices = {0u, 1u};
auto csr = devices[0]->getDefaultEngine().commandStreamReceiver;
auto allocator = csr->createMultiRootDeviceTimestampPacketAllocator(indices);
class MockTagAllocatorBase : public TagAllocatorBase {
public:
using TagAllocatorBase::maxRootDeviceIndex;
};
EXPECT_EQ(reinterpret_cast<MockTagAllocatorBase *>(allocator.get())->maxRootDeviceIndex, 1u);
}
HWTEST_F(CommandStreamReceiverHwTest, GivenFiveRootDevicesWhengetMultiRootDeviceTimestampPacketAllocatorCalledThenAllocatorForFiveDevicesCreated) {
auto executionEnvironment = std::make_unique<MockExecutionEnvironment>(defaultHwInfo.get(), true, 4u);
auto devices = DeviceFactory::createDevices(*executionEnvironment.release());
const RootDeviceIndicesContainer indices = {0u, 1u, 2u, 3u};
auto csr = devices[0]->getDefaultEngine().commandStreamReceiver;
auto allocator = csr->createMultiRootDeviceTimestampPacketAllocator(indices);
class MockTagAllocatorBase : public TagAllocatorBase {
public:
using TagAllocatorBase::maxRootDeviceIndex;
};
EXPECT_EQ(reinterpret_cast<MockTagAllocatorBase *>(allocator.get())->maxRootDeviceIndex, 3u);
}
HWTEST_F(CommandStreamReceiverHwTest, givenMultiRootDeviceSyncNodeWhenFlushBcsTAskThenMiFlushAdded) {
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
auto mockTagAllocator = std::make_unique<MockTagAllocator<>>(pDevice->getRootDeviceIndex(), pDevice->getExecutionEnvironment()->memoryManager.get(), 10u);
auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::BufferToHostPtr,
commandStreamReceiver, commandStreamReceiver.getTagAllocation(), nullptr,
commandStreamReceiver.getTagAllocation()->getUnderlyingBuffer(),
commandStreamReceiver.getTagAllocation()->getGpuAddress(), 0,
0, 0, 0, 0, 0, 0, 0);
auto tag = mockTagAllocator->getTag();
blitProperties.multiRootDeviceEventSync = tag;
BlitPropertiesContainer container;
container.push_back(blitProperties);
commandStreamReceiver.flushBcsTask(container, true, false, *pDevice);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
auto cmdIterator = find<typename FamilyType::MI_FLUSH_DW *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
bool nodeAddressFound = false;
while (cmdIterator != hwParser.cmdList.end()) {
auto flush = genCmdCast<MI_FLUSH_DW *>(*cmdIterator);
if (flush->getDestinationAddress() == tag->getGpuAddress() + tag->getContextEndOffset()) {
nodeAddressFound = true;
break;
}
cmdIterator = find<typename FamilyType::MI_FLUSH_DW *>(++cmdIterator, hwParser.cmdList.end());
}
EXPECT_TRUE(nodeAddressFound);
}
HWTEST_F(CommandStreamReceiverHwTest, givenNullPtrAsMultiRootDeviceSyncNodeWhenFlushBcsTAskThenMiFlushNotAdded) {
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
auto mockTagAllocator = std::make_unique<MockTagAllocator<>>(pDevice->getRootDeviceIndex(), pDevice->getExecutionEnvironment()->memoryManager.get(), 10u);
auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::BufferToHostPtr,
commandStreamReceiver, commandStreamReceiver.getTagAllocation(), nullptr,
commandStreamReceiver.getTagAllocation()->getUnderlyingBuffer(),
commandStreamReceiver.getTagAllocation()->getGpuAddress(), 0,
0, 0, 0, 0, 0, 0, 0);
auto tag = mockTagAllocator->getTag();
BlitPropertiesContainer container;
container.push_back(blitProperties);
commandStreamReceiver.flushBcsTask(container, true, false, *pDevice);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
auto cmdIterator = find<typename FamilyType::MI_FLUSH_DW *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
bool nodeAddressFound = false;
while (cmdIterator != hwParser.cmdList.end()) {
auto flush = genCmdCast<MI_FLUSH_DW *>(*cmdIterator);
if (flush->getDestinationAddress() == tag->getGpuAddress() + tag->getContextEndOffset()) {
nodeAddressFound = true;
break;
}
cmdIterator = find<typename FamilyType::MI_FLUSH_DW *>(++cmdIterator, hwParser.cmdList.end());
}
EXPECT_FALSE(nodeAddressFound);
}

View File

@ -15,6 +15,7 @@
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/default_hw_info.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h"
#include "shared/test/common/mocks/mock_timestamp_container.h"
#include "shared/test/common/mocks/ult_device_factory.h"
#include "shared/test/common/test_macros/test_checks_shared.h"
@ -663,3 +664,25 @@ HWTEST2_F(BlitTests, givenPlatformWhenCallingDispatchPreBlitCommandThenNoneMiFlu
auto cmdIterator = find<typename FamilyType::MI_FLUSH_DW *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_EQ(hwParser.cmdList.end(), cmdIterator);
}
HWTEST_F(BlitTests, givenPlatformWhenCallingDispatchPreBlitCommandThenNoneMiFlushDwIsProgramed) {
auto mockTagAllocator = std::make_unique<MockTagAllocator<>>(pDevice->getRootDeviceIndex(), pDevice->getExecutionEnvironment()->memoryManager.get(), 10u);
auto tag = mockTagAllocator->getTag();
BlitProperties blitProperties{};
blitProperties.copySize = {1, 1, 1};
BlitPropertiesContainer blitPropertiesContainer1;
blitPropertiesContainer1.push_back(blitProperties);
blitPropertiesContainer1.push_back(blitProperties);
blitPropertiesContainer1.push_back(blitProperties);
auto estimatedSizeWithoutNode = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(
blitPropertiesContainer1, false, true, false, pDevice->getRootDeviceEnvironment());
blitProperties.multiRootDeviceEventSync = tag;
BlitPropertiesContainer blitPropertiesContainer2;
blitPropertiesContainer2.push_back(blitProperties);
blitPropertiesContainer2.push_back(blitProperties);
blitPropertiesContainer2.push_back(blitProperties);
auto estimatedSizeWithNode = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(
blitPropertiesContainer2, false, true, false, pDevice->getRootDeviceEnvironment());
EXPECT_NE(estimatedSizeWithoutNode, estimatedSizeWithNode);
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 Intel Corporation
* Copyright (C) 2022-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -301,3 +301,35 @@ HWTEST_F(DeviceTimestampPacketTests, givenDebugFlagSetWhenCreatingTimestampPacke
EXPECT_FALSE(tag->canBeReleased());
}
using TimestampPacketHelperTests = Test<DeviceFixture>;
HWTEST_F(TimestampPacketHelperTests, givenTagNodesInMultiRootSyncContainerWhenProgramingDependensiecThenSemaforesAreProgrammed) {
StackVec<char, 4096> buffer(4096);
LinearStream cmdStream(buffer.begin(), buffer.size());
CsrDependencies deps;
auto mockTagAllocator = std::make_unique<MockTagAllocator<>>(0, pDevice->getMemoryManager());
TimestampPacketContainer container = {};
container.add(mockTagAllocator->getTag());
deps.multiRootTimeStampSyncContainer.push_back(&container);
TimestampPacketHelper::programCsrDependenciesForForMultiRootDeviceSyncContainer<FamilyType>(cmdStream, deps);
EXPECT_EQ(cmdStream.getUsed(), sizeof(typename FamilyType::MI_SEMAPHORE_WAIT));
}
HWTEST_F(TimestampPacketHelperTests, givenEmptyContainerMultiRootSyncContainerWhenProgramingDependensiecThenZeroSemaforesAreProgrammed) {
StackVec<char, 4096> buffer(4096);
LinearStream cmdStream(buffer.begin(), buffer.size());
CsrDependencies deps;
TimestampPacketContainer container = {};
deps.multiRootTimeStampSyncContainer.push_back(&container);
TimestampPacketHelper::programCsrDependenciesForForMultiRootDeviceSyncContainer<FamilyType>(cmdStream, deps);
EXPECT_EQ(cmdStream.getUsed(), 0u);
}
HWTEST_F(TimestampPacketHelperTests, givenEmptyMultiRootSyncContainerWhenProgramingDependensiecThenZeroSemaforesAreProgrammed) {
StackVec<char, 4096> buffer(4096);
LinearStream cmdStream(buffer.begin(), buffer.size());
CsrDependencies deps;
TimestampPacketHelper::programCsrDependenciesForForMultiRootDeviceSyncContainer<FamilyType>(cmdStream, deps);
EXPECT_EQ(cmdStream.getUsed(), 0u);
}