diff --git a/opencl/source/command_queue/command_queue_hw.h b/opencl/source/command_queue/command_queue_hw.h index 8fbadd5f80..5679c2f616 100644 --- a/opencl/source/command_queue/command_queue_hw.h +++ b/opencl/source/command_queue/command_queue_hw.h @@ -391,8 +391,7 @@ class CommandQueueHw : public CommandQueue { EventsRequest &eventsRequest, EventBuilder &externalEventBuilder, std::unique_ptr &&printfHandler, - CommandStreamReceiver *bcsCsr, - TagNodeBase *multiRootDeviceSyncNode); + CommandStreamReceiver *bcsCsr); CompletionStamp enqueueCommandWithoutKernel(Surface **surfaces, size_t surfaceCount, @@ -423,7 +422,7 @@ class CommandQueueHw : public CommandQueue { TimestampPacketDependencies ×tampPacketDependencies, const EventsRequest &eventsRequest, LinearStream *commandStream, - uint32_t commandType, bool queueBlocked, TagNodeBase *multiRootDeviceEventSync); + uint32_t commandType, bool queueBlocked); void submitCacheFlush(Surface **surfaces, size_t numSurfaces, LinearStream *commandStream, @@ -434,8 +433,6 @@ class CommandQueueHw : public CommandQueue { bool waitForTimestamps(Range copyEnginesToWait, TaskCountType taskCount, WaitStatus &status, TimestampPacketContainer *mainContainer, TimestampPacketContainer *deferredContainer) override; MOCKABLE_VIRTUAL bool isCacheFlushForBcsRequired() const; - void processSignalMultiRootDeviceNode(LinearStream *commandStream, - TagNodeBase *node); protected: MOCKABLE_VIRTUAL void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo){}; @@ -476,7 +473,7 @@ class CommandQueueHw : public CommandQueue { blockedCommandsData = std::make_unique(commandStream, *gpgpuCsr.getInternalAllocationStorage()); } else { commandStream = &getCommandStream(*this, csrDependencies, profilingRequired, perfCountersRequired, - blitEnqueue, multiDispatchInfo, surfaces, numSurfaces, isMarkerWithProfiling, eventsRequest.numEventsInWaitList > 0, eventsRequest.outEvent); + blitEnqueue, multiDispatchInfo, surfaces, numSurfaces, isMarkerWithProfiling, eventsRequest.numEventsInWaitList > 0); } return commandStream; } diff --git a/opencl/source/command_queue/command_queue_hw_xehp_and_later.inl b/opencl/source/command_queue/command_queue_hw_xehp_and_later.inl index 27b00dd488..f2f9bb590a 100644 --- a/opencl/source/command_queue/command_queue_hw_xehp_and_later.inl +++ b/opencl/source/command_queue/command_queue_hw_xehp_and_later.inl @@ -45,7 +45,7 @@ bool CommandQueueHw::isCacheFlushCommand(uint32_t commandType) const { } template <> -LinearStream &getCommandStream(CommandQueue &commandQueue, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounterCmdsSpace, bool blitEnqueue, const MultiDispatchInfo &multiDispatchInfo, Surface **surfaces, size_t numSurfaces, bool isMarkerWithProfiling, bool eventsInWaitList, cl_event *outEvent) { +LinearStream &getCommandStream(CommandQueue &commandQueue, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounterCmdsSpace, bool blitEnqueue, const MultiDispatchInfo &multiDispatchInfo, Surface **surfaces, size_t numSurfaces, bool isMarkerWithProfiling, bool eventsInWaitList) { size_t expectedSizeCS = 0; [[maybe_unused]] bool usePostSync = false; if (commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index 55b2c1c440..126abd7aaa 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -177,7 +177,7 @@ cl_int CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, BlitPropertiesContainer blitPropertiesContainer; if (this->context->getRootDeviceIndices().size() > 1) { - eventsRequest.fillCsrDependenciesForRootDevices(csrDeps, computeCommandStreamReceiver); + eventsRequest.fillCsrDependenciesForTaskCountContainer(csrDeps, computeCommandStreamReceiver); } const bool enqueueWithBlitAuxTranslation = isBlitAuxTranslationRequired(multiDispatchInfo); @@ -226,7 +226,7 @@ cl_int CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, } if (this->context->getRootDeviceIndices().size() > 1) { - TimestampPacketHelper::programCsrDependenciesForForMultiRootDeviceSyncContainer(commandStream, csrDeps); + TimestampPacketHelper::programCsrDependenciesForForTaskCountContainer(commandStream, csrDeps); } if (enqueueWithBlitAuxTranslation) { @@ -280,17 +280,6 @@ cl_int CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, } else if (isMarkerWithPostSyncWrite) { processDispatchForMarker(*this, &commandStream, eventsRequest, csrDeps); } - TagNodeBase *multiRootEventSyncStamp = nullptr; - if (eventBuilder.getEvent() && eventBuilder.getEvent()->getContext()->getRootDeviceIndices().size() > 1) { - multiRootEventSyncStamp = eventBuilder.getEvent()->getMultiRootTimestampSyncNode(); - if (!blockQueue) { - this->getGpgpuCommandStreamReceiver().makeResident(*multiRootEventSyncStamp->getBaseGraphicsAllocation()); - } - processSignalMultiRootDeviceNode(&commandStream, multiRootEventSyncStamp); - if (CL_COMMAND_MARKER == commandType) { - flushDependenciesForNonKernelCommand = true; - } - } CompletionStamp completionStamp = {CompletionStamp::notReady, taskLevel, 0}; const EnqueueProperties enqueueProperties(false, !multiDispatchInfo.empty(), isCacheFlushCommand(commandType), @@ -393,8 +382,7 @@ cl_int CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, eventsRequest, eventBuilder, std::move(printfHandler), - nullptr, - multiRootEventSyncStamp); + nullptr); } if (deferredTimestampPackets.get()) { @@ -509,7 +497,7 @@ BlitProperties CommandQueueHw::processDispatchForBlitEnqueue(CommandS const MultiDispatchInfo &multiDispatchInfo, TimestampPacketDependencies ×tampPacketDependencies, const EventsRequest &eventsRequest, LinearStream *commandStream, - uint32_t commandType, bool queueBlocked, TagNodeBase *multiRootDeviceEventSync) { + uint32_t commandType, bool queueBlocked) { auto blitDirection = ClBlitProperties::obtainBlitDirection(commandType); auto blitProperties = ClBlitProperties::constructProperties(blitDirection, blitCommandStreamReceiver, @@ -522,7 +510,7 @@ BlitProperties CommandQueueHw::processDispatchForBlitEnqueue(CommandS blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies.previousEnqueueNodes); blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies.barrierNodes); } - blitProperties.multiRootDeviceEventSync = multiRootDeviceEventSync; + auto currentTimestampPacketNode = timestampPacketContainer->peekNodes().at(0); blitProperties.outputTimestampPacket = currentTimestampPacketNode; @@ -628,20 +616,7 @@ void CommandQueueHw::processDispatchForMarker(CommandQueue &commandQu HardwareInterface::dispatchProfilingPerfEndCommands(hwTimeStamps, hwPerfCounter, commandStream, commandQueue); getGpgpuCommandStreamReceiver().makeResident(*hwTimeStamps->getBaseGraphicsAllocation()); } -template -void CommandQueueHw::processSignalMultiRootDeviceNode(LinearStream *commandStream, - TagNodeBase *node) { - const auto &hwInfo = getDevice().getHardwareInfo(); - PipeControlArgs args; - args.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); - MemorySynchronizationCommands::addBarrierWithPostSyncOperation( - *commandStream, - PostSyncMode::ImmediateData, - node->getGpuAddress() + node->getContextEndOffset(), - std::numeric_limits::max(), - hwInfo, - args); -} + template void CommandQueueHw::processDispatchForMarkerWithTimestampPacket(CommandQueue &commandQueue, LinearStream *commandStream, @@ -927,8 +902,7 @@ void CommandQueueHw::enqueueBlocked( EventsRequest &eventsRequest, EventBuilder &externalEventBuilder, std::unique_ptr &&printfHandler, - CommandStreamReceiver *bcsCsr, - TagNodeBase *multiRootDeviceSyncNode) { + CommandStreamReceiver *bcsCsr) { TakeOwnershipWrapper> queueOwnership(*this); @@ -999,8 +973,7 @@ void CommandQueueHw::enqueueBlocked( std::move(printfHandler), preemptionMode, multiDispatchInfo.peekMainKernel(), - (uint32_t)multiDispatchInfo.size(), - multiRootDeviceSyncNode); + (uint32_t)multiDispatchInfo.size()); } if (storeTimestampPackets) { command->setTimestampPacketNode(*timestampPacketContainer, std::move(timestampPacketDependencies)); @@ -1307,14 +1280,10 @@ cl_int CommandQueueHw::enqueueBlit(const MultiDispatchInfo &multiDisp } TimestampPacketDependencies timestampPacketDependencies; - TagNodeBase *multiRootEventSyncStamp = nullptr; BlitPropertiesContainer blitPropertiesContainer; CsrDependencies csrDeps; eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, bcsCsr, CsrDependencies::DependenciesType::All); - if (this->context->getRootDeviceIndices().size() > 1) { - eventsRequest.fillCsrDependenciesForRootDevices(csrDeps, bcsCsr); - } auto allocator = bcsCsr.getTimestampPacketAllocator(); if (!blockQueue) { @@ -1341,10 +1310,6 @@ cl_int CommandQueueHw::enqueueBlit(const MultiDispatchInfo &multiDisp if (eventBuilder.getEvent()) { eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer); } - if (eventBuilder.getEvent() && eventBuilder.getEvent()->getContext()->getRootDeviceIndices().size() > 1) { - multiRootEventSyncStamp = eventBuilder.getEvent()->getMultiRootTimestampSyncNode(); - bcsCsr.makeResident(*multiRootEventSyncStamp->getBaseGraphicsAllocation()); - } CompletionStamp completionStamp = {CompletionStamp::notReady, taskLevel, 0}; @@ -1361,7 +1326,7 @@ cl_int CommandQueueHw::enqueueBlit(const MultiDispatchInfo &multiDisp } blitPropertiesContainer.push_back(processDispatchForBlitEnqueue(bcsCsr, multiDispatchInfo, timestampPacketDependencies, - eventsRequest, gpgpuCommandStream, cmdType, blockQueue, multiRootEventSyncStamp)); + eventsRequest, gpgpuCommandStream, cmdType, blockQueue)); if (!blockQueue) { completionStamp = enqueueCommandWithoutKernel(nullptr, 0, gpgpuCommandStream, gpgpuCommandStreamStart, blocking, @@ -1388,7 +1353,7 @@ cl_int CommandQueueHw::enqueueBlit(const MultiDispatchInfo &multiDisp updateFromCompletionStamp(completionStamp, eventBuilder.getEvent()); if (blockQueue) { - enqueueBlocked(cmdType, nullptr, 0, multiDispatchInfo, timestampPacketDependencies, blockedCommandsData, enqueueProperties, eventsRequest, eventBuilder, nullptr, &bcsCsr, multiRootEventSyncStamp); + enqueueBlocked(cmdType, nullptr, 0, multiDispatchInfo, timestampPacketDependencies, blockedCommandsData, enqueueProperties, eventsRequest, eventBuilder, nullptr, &bcsCsr); if (gpgpuSubmission) { if (DebugManager.flags.ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission.get() == 1) { diff --git a/opencl/source/command_queue/gpgpu_walker.h b/opencl/source/command_queue/gpgpu_walker.h index a7ae002f44..e22858c851 100644 --- a/opencl/source/command_queue/gpgpu_walker.h +++ b/opencl/source/command_queue/gpgpu_walker.h @@ -88,7 +88,7 @@ class GpgpuWalkerHelper { template struct EnqueueOperation { using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; - static size_t getTotalSizeRequiredCS(uint32_t eventType, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounters, bool blitEnqueue, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, bool isMarkerWithProfiling, bool eventsInWaitList, cl_event *outEvent); + static size_t getTotalSizeRequiredCS(uint32_t eventType, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounters, bool blitEnqueue, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, bool isMarkerWithProfiling, bool eventsInWaitList); static size_t getSizeRequiredCS(uint32_t cmdType, bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel, const DispatchInfo &dispatchInfo); static size_t getSizeRequiredForTimestampPacketWrite(); static size_t getSizeForCacheFlushAfterWalkerCommands(const Kernel &kernel, const CommandQueue &commandQueue); @@ -101,8 +101,8 @@ struct EnqueueOperation { template LinearStream &getCommandStream(CommandQueue &commandQueue, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounterCmdsSpace, bool blitEnqueue, const MultiDispatchInfo &multiDispatchInfo, - Surface **surfaces, size_t numSurfaces, bool isMarkerWithProfiling, bool eventsInWaitList, cl_event *outEvent) { - size_t expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(eventType, csrDeps, reserveProfilingCmdsSpace, reservePerfCounterCmdsSpace, blitEnqueue, commandQueue, multiDispatchInfo, isMarkerWithProfiling, eventsInWaitList, outEvent); + Surface **surfaces, size_t numSurfaces, bool isMarkerWithProfiling, bool eventsInWaitList) { + size_t expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(eventType, csrDeps, reserveProfilingCmdsSpace, reservePerfCounterCmdsSpace, blitEnqueue, commandQueue, multiDispatchInfo, isMarkerWithProfiling, eventsInWaitList); return commandQueue.getCS(expectedSizeCS); } diff --git a/opencl/source/command_queue/gpgpu_walker_base.inl b/opencl/source/command_queue/gpgpu_walker_base.inl index 1bfc233caf..f057e5c324 100644 --- a/opencl/source/command_queue/gpgpu_walker_base.inl +++ b/opencl/source/command_queue/gpgpu_walker_base.inl @@ -165,7 +165,7 @@ size_t GpgpuWalkerHelper::getSizeForWaDisableRccRhwoOptimization(cons } template -size_t EnqueueOperation::getTotalSizeRequiredCS(uint32_t eventType, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounters, bool blitEnqueue, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, bool isMarkerWithProfiling, bool eventsInWaitlist, cl_event *outEvent) { +size_t EnqueueOperation::getTotalSizeRequiredCS(uint32_t eventType, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounters, bool blitEnqueue, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, bool isMarkerWithProfiling, bool eventsInWaitlist) { size_t expectedSizeCS = 0; auto &hwInfo = commandQueue.getDevice().getHardwareInfo(); auto &gfxCoreHelper = commandQueue.getDevice().getGfxCoreHelper(); @@ -218,14 +218,8 @@ size_t EnqueueOperation::getTotalSizeRequiredCS(uint32_t eventType, c if (DebugManager.flags.GpuScratchRegWriteAfterWalker.get() != -1) { expectedSizeCS += sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM); } - expectedSizeCS += TimestampPacketHelper::getRequiredCmdStreamSizeForMultiRootDeviceSyncNodesContainer(csrDeps); - if (outEvent) { - auto pEvent = castToObjectOrAbort(*outEvent); - if ((pEvent->getContext()->getRootDeviceIndices().size() > 1) && (!pEvent->isUserEvent())) { - expectedSizeCS += MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(hwInfo, false); - } - } - expectedSizeCS += MemorySynchronizationCommands::getSizeForSingleBarrier(false); + + expectedSizeCS += TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer(csrDeps); return expectedSizeCS; } diff --git a/opencl/source/command_queue/hardware_interface.h b/opencl/source/command_queue/hardware_interface.h index 021b02cb79..42d66d7aef 100644 --- a/opencl/source/command_queue/hardware_interface.h +++ b/opencl/source/command_queue/hardware_interface.h @@ -31,7 +31,6 @@ struct HardwareInterfaceWalkerArgs { size_t localWorkSizes[3] = {}; TagNodeBase *hwTimeStamps = nullptr; TagNodeBase *hwPerfCounter = nullptr; - TagNodeBase *multiRootDeviceEventStamp = nullptr; TimestampPacketDependencies *timestampPacketDependencies = nullptr; TimestampPacketContainer *currentTimestampPacketNodes = nullptr; const Vec3 *numberOfWorkgroups = nullptr; diff --git a/opencl/source/command_queue/hardware_interface_base.inl b/opencl/source/command_queue/hardware_interface_base.inl index f1f1d7cf63..0d6912b3d9 100644 --- a/opencl/source/command_queue/hardware_interface_base.inl +++ b/opencl/source/command_queue/hardware_interface_base.inl @@ -133,7 +133,6 @@ void HardwareInterface::dispatchWalker( walkerArgs.currentTimestampPacketNodes); walkerArgs.currentDispatchIndex = 0; - for (auto &dispatchInfo : multiDispatchInfo) { dispatchInfo.dispatchInitCommands(*commandStream, walkerArgs.timestampPacketDependencies, commandQueue.getDevice().getHardwareInfo()); walkerArgs.isMainKernel = (dispatchInfo.getKernel() == mainKernel); diff --git a/opencl/source/context/context.cpp b/opencl/source/context/context.cpp index 8e71882bb3..a725a72e65 100644 --- a/opencl/source/context/context.cpp +++ b/opencl/source/context/context.cpp @@ -49,9 +49,7 @@ Context::Context( Context::~Context() { gtpinNotifyContextDestroy((cl_context)this); - if (multiRootDeviceTimestampPacketAllocator.get() != nullptr) { - multiRootDeviceTimestampPacketAllocator.reset(); - } + if (smallBufferPoolAllocator.isAggregatedSmallBuffersEnabled(this)) { smallBufferPoolAllocator.releaseSmallBufferPool(); } @@ -566,15 +564,5 @@ void Context::BufferPoolAllocator::releaseSmallBufferPool() { delete this->mainStorage; this->mainStorage = nullptr; } -TagAllocatorBase *Context::getMultiRootDeviceTimestampPacketAllocator() { - return multiRootDeviceTimestampPacketAllocator.get(); -} -void Context::setMultiRootDeviceTimestampPacketAllocator(std::unique_ptr &allocator) { - multiRootDeviceTimestampPacketAllocator = std::move(allocator); -} - -std::unique_lock Context::obtainOwnershipForMultiRootDeviceAllocator() { - return std::unique_lock(multiRootDeviceAllocatorMtx); -} } // namespace NEO diff --git a/opencl/source/context/context.h b/opencl/source/context/context.h index 45d359ecb5..f06597683a 100644 --- a/opencl/source/context/context.h +++ b/opencl/source/context/context.h @@ -37,7 +37,6 @@ class SharingFunctions; class SVMAllocsManager; class Program; class Platform; -class TagAllocatorBase; template <> struct OpenCLObjectMapper<_cl_context> { @@ -224,9 +223,6 @@ class Context : public BaseObject<_cl_context> { BufferPoolAllocator &getBufferPoolAllocator() { return this->smallBufferPoolAllocator; } - TagAllocatorBase *getMultiRootDeviceTimestampPacketAllocator(); - std::unique_lock obtainOwnershipForMultiRootDeviceAllocator(); - void setMultiRootDeviceTimestampPacketAllocator(std::unique_ptr &allocator); protected: struct BuiltInKernel { @@ -267,8 +263,6 @@ class Context : public BaseObject<_cl_context> { uint32_t maxRootDeviceIndex = std::numeric_limits::max(); cl_bool preferD3dSharedResources = 0u; ContextType contextType = ContextType::CONTEXT_TYPE_DEFAULT; - std::unique_ptr multiRootDeviceTimestampPacketAllocator; - std::mutex multiRootDeviceAllocatorMtx; bool interopUserSync = false; bool resolvesRequiredInKernels = false; diff --git a/opencl/source/event/event.cpp b/opencl/source/event/event.cpp index 27953f1cc8..e1752f9774 100644 --- a/opencl/source/event/event.cpp +++ b/opencl/source/event/event.cpp @@ -131,9 +131,6 @@ Event::~Event() { if (timeStampNode != nullptr) { timeStampNode->returnTag(); } - if (multiRootTimeStampSyncNode != nullptr) { - multiRootTimeStampSyncNode->returnTag(); - } if (perfCounterNode != nullptr) { cmdQueue->getPerfCounters()->deleteQuery(perfCounterNode->getQueryHandleRef()); perfCounterNode->getQueryHandleRef() = {}; @@ -886,6 +883,7 @@ TagNodeBase *Event::getHwTimeStampNode() { } TagNodeBase *Event::getHwPerfCounterNode() { + if (!perfCounterNode && cmdQueue->getPerfCounters()) { const uint32_t gpuReportSize = HwPerfCounter::getSize(*(cmdQueue->getPerfCounters())); perfCounterNode = cmdQueue->getGpgpuCommandStreamReceiver().getEventPerfCountAllocator(gpuReportSize)->getTag(); @@ -893,27 +891,11 @@ TagNodeBase *Event::getHwPerfCounterNode() { return perfCounterNode; } -TagNodeBase *Event::getMultiRootTimestampSyncNode() { - auto lock = getContext()->obtainOwnershipForMultiRootDeviceAllocator(); - if (getContext()->getMultiRootDeviceTimestampPacketAllocator() == nullptr) { - auto allocator = cmdQueue->getGpgpuCommandStreamReceiver().createMultiRootDeviceTimestampPacketAllocator(getContext()->getRootDeviceIndices()); - getContext()->setMultiRootDeviceTimestampPacketAllocator(allocator); - } - lock.unlock(); - if (multiRootDeviceTimestampPacketContainer.get() == nullptr) { - multiRootDeviceTimestampPacketContainer = std::make_unique(); - } - multiRootTimeStampSyncNode = getContext()->getMultiRootDeviceTimestampPacketAllocator()->getTag(); - multiRootDeviceTimestampPacketContainer->add(multiRootTimeStampSyncNode); - return multiRootTimeStampSyncNode; -} - void Event::addTimestampPacketNodes(const TimestampPacketContainer &inputTimestampPacketContainer) { timestampPacketContainer->assignAndIncrementNodesRefCounts(inputTimestampPacketContainer); } TimestampPacketContainer *Event::getTimestampPacketNodes() const { return timestampPacketContainer.get(); } -TimestampPacketContainer *Event::getMultiRootDeviceTimestampPacketNodes() const { return multiRootDeviceTimestampPacketContainer.get(); } bool Event::checkUserEventDependencies(cl_uint numEventsInWaitList, const cl_event *eventWaitList) { bool userEventsDependencies = false; diff --git a/opencl/source/event/event.h b/opencl/source/event/event.h index dd7ed18471..a502fe415d 100644 --- a/opencl/source/event/event.h +++ b/opencl/source/event/event.h @@ -115,7 +115,6 @@ class Event : public BaseObject<_cl_event>, public IDNode { void addTimestampPacketNodes(const TimestampPacketContainer &inputTimestampPacketContainer); TimestampPacketContainer *getTimestampPacketNodes() const; - TimestampPacketContainer *getMultiRootDeviceTimestampPacketNodes() const; bool isPerfCountersEnabled() const { return perfCountersEnabled; @@ -130,7 +129,6 @@ class Event : public BaseObject<_cl_event>, public IDNode { } TagNodeBase *getHwPerfCounterNode(); - TagNodeBase *getMultiRootTimestampSyncNode(); std::unique_ptr flushStamp; std::atomic taskLevel; @@ -386,10 +384,8 @@ class Event : public BaseObject<_cl_event>, public IDNode { bool perfCountersEnabled; TagNodeBase *timeStampNode = nullptr; TagNodeBase *perfCounterNode = nullptr; - TagNodeBase *multiRootTimeStampSyncNode = nullptr; std::unique_ptr timestampPacketContainer; // number of events this event depends on - std::unique_ptr multiRootDeviceTimestampPacketContainer; std::atomic parentCount; // event parents std::vector parentEvents; diff --git a/opencl/source/helpers/properties_helper.cpp b/opencl/source/helpers/properties_helper.cpp index db1adee64d..52dcaa6b1a 100644 --- a/opencl/source/helpers/properties_helper.cpp +++ b/opencl/source/helpers/properties_helper.cpp @@ -26,6 +26,7 @@ namespace NEO { void flushDependentCsr(CommandStreamReceiver &dependentCsr, CsrDependencies &csrDeps) { auto csrOwnership = dependentCsr.obtainUniqueOwnership(); dependentCsr.updateTagFromWait(); + csrDeps.taskCountContainer.push_back({dependentCsr.peekTaskCount(), reinterpret_cast(dependentCsr.getTagAddress())}); } void EventsRequest::fillCsrDependenciesForTimestampPacketContainer(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr, CsrDependencies::DependenciesType depsType) const { @@ -59,7 +60,6 @@ void EventsRequest::fillCsrDependenciesForTimestampPacketContainer(CsrDependenci if (productHelper.isDcFlushAllowed()) { if (!dependentCsr.isLatestTaskCountFlushed()) { flushDependentCsr(dependentCsr, csrDeps); - // csrDeps.taskCountContainer.push_back({dependentCsr.peekTaskCount(), reinterpret_cast(dependentCsr.getTagAddress())}); currentCsr.makeResident(*dependentCsr.getTagAllocation()); } } @@ -68,22 +68,23 @@ void EventsRequest::fillCsrDependenciesForTimestampPacketContainer(CsrDependenci } } -void EventsRequest::fillCsrDependenciesForRootDevices(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr) const { +void EventsRequest::fillCsrDependenciesForTaskCountContainer(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr) const { for (cl_uint i = 0; i < this->numEventsInWaitList; i++) { auto event = castToObjectOrAbort(this->eventWaitList[i]); if (event->isUserEvent() || CompletionStamp::notReady == event->peekTaskCount()) { continue; } + if (event->getCommandQueue() && event->getCommandQueue()->getDevice().getRootDeviceIndex() != currentCsr.getRootDeviceIndex()) { - auto timestampPacketContainer = event->getMultiRootDeviceTimestampPacketNodes(); - if (!timestampPacketContainer || timestampPacketContainer->peekNodes().empty()) { - continue; - } auto &dependentCsr = event->getCommandQueue()->getGpgpuCommandStreamReceiver(); if (!dependentCsr.isLatestTaskCountFlushed()) { flushDependentCsr(dependentCsr, csrDeps); + } else { + csrDeps.taskCountContainer.push_back({event->peekTaskCount(), reinterpret_cast(dependentCsr.getTagAddress())}); } - csrDeps.multiRootTimeStampSyncContainer.push_back(timestampPacketContainer); + + auto graphicsAllocation = event->getCommandQueue()->getGpgpuCommandStreamReceiver().getTagsMultiAllocation()->getGraphicsAllocation(currentCsr.getRootDeviceIndex()); + currentCsr.getResidencyAllocations().push_back(graphicsAllocation); } } } diff --git a/opencl/source/helpers/properties_helper.h b/opencl/source/helpers/properties_helper.h index 5a653b18e5..37b2c6564b 100644 --- a/opencl/source/helpers/properties_helper.h +++ b/opencl/source/helpers/properties_helper.h @@ -25,7 +25,7 @@ struct EventsRequest { : numEventsInWaitList(numEventsInWaitList), eventWaitList(eventWaitList), outEvent(outEvent) {} void fillCsrDependenciesForTimestampPacketContainer(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr, CsrDependencies::DependenciesType depsType) const; - void fillCsrDependenciesForRootDevices(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr) const; + void fillCsrDependenciesForTaskCountContainer(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr) const; void setupBcsCsrForOutputEvent(CommandStreamReceiver &bcsCsr) const; cl_uint numEventsInWaitList; diff --git a/opencl/source/helpers/task_information.cpp b/opencl/source/helpers/task_information.cpp index 8e827e5eef..92609fdbc6 100644 --- a/opencl/source/helpers/task_information.cpp +++ b/opencl/source/helpers/task_information.cpp @@ -119,11 +119,10 @@ CompletionStamp &CommandMapUnmap::submit(TaskCountType taskLevel, bool terminate CommandComputeKernel::CommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr &kernelOperation, std::vector surfaces, bool flushDC, bool usesSLM, uint32_t commandType, std::unique_ptr &&printfHandler, - PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount, - TagNodeBase *multiRootDeviceSyncNode) + PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount) : Command(commandQueue, kernelOperation), surfaces(std::move(surfaces)), flushDC(flushDC), slmUsed(usesSLM), commandType(commandType), printfHandler(std::move(printfHandler)), kernel(kernel), - kernelCount(kernelCount), preemptionMode(preemptionMode), multiRootDeviceSyncNode(multiRootDeviceSyncNode) { + kernelCount(kernelCount), preemptionMode(preemptionMode) { UNRECOVERABLE_IF(nullptr == this->kernel); kernel->incRefInternal(); } @@ -165,9 +164,6 @@ CompletionStamp &CommandComputeKernel::submit(TaskCountType taskLevel, bool term printfHandler->makeResident(commandStreamReceiver); } makeTimestampPacketsResident(commandStreamReceiver); - if (multiRootDeviceSyncNode != nullptr) { - commandStreamReceiver.makeResident(*multiRootDeviceSyncNode->getBaseGraphicsAllocation()); - } if (kernelOperation->blitPropertiesContainer.size() > 0) { CsrDependencies csrDeps; @@ -220,7 +216,7 @@ CompletionStamp &CommandComputeKernel::submit(TaskCountType taskLevel, bool term false); // stateCacheInvalidation if (commandQueue.getContext().getRootDeviceIndices().size() > 1) { - eventsRequest.fillCsrDependenciesForRootDevices(dispatchFlags.csrDependencies, commandStreamReceiver); + eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver); } const bool isHandlingBarrier = commandQueue.getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired(); @@ -313,7 +309,7 @@ TaskCountType CommandWithoutKernel::dispatchBlitOperation() { blitProperties.outputTimestampPacket = currentTimestampPacketNodes->peekNodes()[0]; if (commandQueue.getContext().getRootDeviceIndices().size() > 1) { - eventsRequest.fillCsrDependenciesForRootDevices(blitProperties.csrDependencies, *bcsCsr); + eventsRequest.fillCsrDependenciesForTaskCountContainer(blitProperties.csrDependencies, *bcsCsr); } const auto newTaskCount = bcsCsr->flushBcsTask(kernelOperation->blitPropertiesContainer, false, commandQueue.isProfilingEnabled(), commandQueue.getDevice()); @@ -396,7 +392,7 @@ CompletionStamp &CommandWithoutKernel::submit(TaskCountType taskLevel, bool term false); // stateCacheInvalidation if (commandQueue.getContext().getRootDeviceIndices().size() > 1) { - eventsRequest.fillCsrDependenciesForRootDevices(dispatchFlags.csrDependencies, commandStreamReceiver); + eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver); } const bool isHandlingBarrier = commandQueue.getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired(); diff --git a/opencl/source/helpers/task_information.h b/opencl/source/helpers/task_information.h index f7fb1c2aad..0eb3934b56 100644 --- a/opencl/source/helpers/task_information.h +++ b/opencl/source/helpers/task_information.h @@ -127,7 +127,7 @@ class CommandComputeKernel : public Command { public: CommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr &kernelOperation, std::vector surfaces, bool flushDC, bool usesSLM, uint32_t commandType, std::unique_ptr &&printfHandler, - PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount, TagNodeBase *multiRootDeviceSyncNode); + PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount); ~CommandComputeKernel() override; @@ -146,7 +146,6 @@ class CommandComputeKernel : public Command { Kernel *kernel; uint32_t kernelCount; PreemptionMode preemptionMode; - TagNodeBase *multiRootDeviceSyncNode; }; class CommandWithoutKernel : public Command { diff --git a/opencl/test/unit_test/command_queue/command_queue_hw_1_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_hw_1_tests.cpp index c369349555..fa064de0cd 100644 --- a/opencl/test/unit_test/command_queue/command_queue_hw_1_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_hw_1_tests.cpp @@ -19,7 +19,6 @@ #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" -#include "opencl/test/unit_test/helpers/cl_hw_parse.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" @@ -194,7 +193,6 @@ class MockCommandStreamReceiverWithFailingFlushBatchedSubmission : public MockCo template struct MockCommandQueueHwWithOverwrittenCsr : public CommandQueueHw { using CommandQueueHw::CommandQueueHw; - using CommandQueueHw::timestampPacketContainer; MockCommandStreamReceiverWithFailingFlushBatchedSubmission *csr; CommandStreamReceiver &getGpgpuCommandStreamReceiver() const override { return *csr; } }; diff --git a/opencl/test/unit_test/command_queue/command_queue_hw_2_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_hw_2_tests.cpp index 962113dfd8..1e1a7dbff0 100644 --- a/opencl/test/unit_test/command_queue/command_queue_hw_2_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_hw_2_tests.cpp @@ -22,7 +22,6 @@ #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" -#include "opencl/test/unit_test/mocks/mock_mdi.h" using namespace NEO; @@ -972,4 +971,4 @@ HWTEST_F(CommandQueueHwTest, GivenBuiltinKernelWhenBuiltinDispatchInfoBuilderIsP EXPECT_EQ(builder.paramsToUse.elws.x, dispatchInfo->getEnqueuedWorkgroupSize().x); EXPECT_EQ(builder.paramsToUse.offset.x, dispatchInfo->getOffset().x); EXPECT_EQ(builder.paramsToUse.kernel, dispatchInfo->getKernel()); -} \ No newline at end of file +} diff --git a/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp b/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp index 82a1c38e12..3f51927119 100644 --- a/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp +++ b/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp @@ -31,7 +31,6 @@ #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" -#include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_mdi.h" #include "opencl/test/unit_test/mocks/mock_program.h" diff --git a/opencl/test/unit_test/command_queue/dispatch_walker_tests_xehp_and_later.cpp b/opencl/test/unit_test/command_queue/dispatch_walker_tests_xehp_and_later.cpp index 9ab8a1218e..b001e5879c 100644 --- a/opencl/test/unit_test/command_queue/dispatch_walker_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/command_queue/dispatch_walker_tests_xehp_and_later.cpp @@ -557,11 +557,11 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenTimestamp MockMultiDispatchInfo multiDispatchInfo(device.get(), std::vector({kernel1.mockKernel, kernel2.mockKernel})); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; - getCommandStream(cmdQ, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false, nullptr); + getCommandStream(cmdQ, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false); size_t sizeWithDisabled = cmdQ.requestedCmdStreamSize; device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; - getCommandStream(cmdQ, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false, nullptr); + getCommandStream(cmdQ, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false); size_t sizeWithEnabled = cmdQ.requestedCmdStreamSize; size_t additionalSize = 0u; @@ -669,7 +669,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenAutoLocal EXPECT_EQ((uint32_t)(expectedKernelStartOffset), idd.getKernelStartPointer()); auto expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, CsrDependencies(), false, false, - false, *cmdQ.get(), multiDispatchInfo, false, false, nullptr); + false, *cmdQ.get(), multiDispatchInfo, false, false); expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize); EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS); @@ -738,7 +738,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenPassInlin EXPECT_EQ((uint32_t)(expectedKernelStartOffset), idd.getKernelStartPointer()); auto expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, CsrDependencies(), false, false, - false, *cmdQ.get(), multiDispatchInfo, false, false, nullptr); + false, *cmdQ.get(), multiDispatchInfo, false, false); expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize); EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS); diff --git a/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp index 35eb00168f..edd2fb1601 100644 --- a/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp @@ -234,7 +234,7 @@ HWTEST_F(EnqueueHandlerTest, givenNonBlitPropertyWhenEnqueueIsBlockedThenDontReg Surface *surfaces[] = {nullptr}; mockCmdQ->enqueueBlocked(CL_COMMAND_MARKER, surfaces, size_t(0), multiDispatchInfo, timestampPacketDependencies, blockedCommandsData, enqueuePropertiesForDependencyFlush, eventsRequest, - eventBuilder, std::unique_ptr(nullptr), nullptr, nullptr); + eventBuilder, std::unique_ptr(nullptr), nullptr); EXPECT_FALSE(blockedCommandsDataForDependencyFlush->blitEnqueue); } @@ -267,7 +267,7 @@ HWTEST_F(EnqueueHandlerTest, givenBlitPropertyWhenEnqueueIsBlockedThenRegisterBl Surface *surfaces[] = {nullptr}; mockCmdQ->enqueueBlocked(CL_COMMAND_READ_BUFFER, surfaces, size_t(0), multiDispatchInfo, timestampPacketDependencies, blockedCommandsData, enqueuePropertiesForBlitEnqueue, eventsRequest, - eventBuilder, std::unique_ptr(nullptr), mockCmdQ->getBcsForAuxTranslation(), nullptr); + eventBuilder, std::unique_ptr(nullptr), mockCmdQ->getBcsForAuxTranslation()); EXPECT_TRUE(blockedCommandsDataForBlitEnqueue->blitEnqueue); EXPECT_EQ(blitProperties.srcAllocation, blockedCommandsDataForBlitEnqueue->blitPropertiesContainer.begin()->srcAllocation); EXPECT_EQ(blitProperties.dstAllocation, blockedCommandsDataForBlitEnqueue->blitPropertiesContainer.begin()->dstAllocation); @@ -351,7 +351,7 @@ HWTEST_F(DispatchFlagsBlitTests, givenBlitEnqueueWhenDispatchingCommandsWithoutK timestampPacketDependencies.cacheFlushNodes.add(mockCmdQ->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag()); BlitProperties blitProperties = mockCmdQ->processDispatchForBlitEnqueue(bcsCsr, multiDispatchInfo, timestampPacketDependencies, - eventsRequest, &mockCmdQ->getCS(0), CL_COMMAND_READ_BUFFER, false, nullptr); + eventsRequest, &mockCmdQ->getCS(0), CL_COMMAND_READ_BUFFER, false); BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); @@ -390,7 +390,7 @@ HWTEST_F(DispatchFlagsBlitTests, givenBlitOperationWhenEnqueueCommandWithoutKern CsrDependencies csrDeps; BlitProperties blitProperties = mockCmdQ->processDispatchForBlitEnqueue(bcsCsr, multiDispatchInfo, timestampPacketDependencies, - eventsRequest, &mockCmdQ->getCS(0), CL_COMMAND_READ_BUFFER, false, nullptr); + eventsRequest, &mockCmdQ->getCS(0), CL_COMMAND_READ_BUFFER, false); BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); @@ -432,7 +432,7 @@ HWTEST_F(DispatchFlagsBlitTests, givenN1EnabledWhenDispatchingWithoutKernelThenA mockCmdQ->obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, true, bcsCsr); timestampPacketDependencies.cacheFlushNodes.add(mockCmdQ->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag()); BlitProperties blitProperties = mockCmdQ->processDispatchForBlitEnqueue(bcsCsr, multiDispatchInfo, timestampPacketDependencies, - eventsRequest, &mockCmdQ->getCS(0), CL_COMMAND_READ_BUFFER, false, nullptr); + eventsRequest, &mockCmdQ->getCS(0), CL_COMMAND_READ_BUFFER, false); BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); @@ -478,7 +478,7 @@ HWTEST_F(DispatchFlagsTests, givenMockKernelWhenSettingAdditionalKernelExecInfoT std::vector v; pKernel->setAdditionalKernelExecInfo(123u); - std::unique_ptr cmd(new CommandComputeKernel(*mockCmdQ.get(), blockedCommandsData, v, false, false, false, std::move(printfHandler), PreemptionMode::Disabled, pKernel, 1, nullptr)); + std::unique_ptr cmd(new CommandComputeKernel(*mockCmdQ.get(), blockedCommandsData, v, false, false, false, std::move(printfHandler), PreemptionMode::Disabled, pKernel, 1)); cmd->submit(1u, false); EXPECT_EQ(mockCsr->passedDispatchFlags.additionalKernelExecInfo, 123u); diff --git a/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests_dg2_and_later.cpp b/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests_dg2_and_later.cpp index f3d47baf7a..f51f97a02b 100644 --- a/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests_dg2_and_later.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests_dg2_and_later.cpp @@ -54,7 +54,7 @@ HWTEST2_F(DispatchFlagsTests, whenSubmittingKernelWithAdditionalKernelExecInfoTh std::vector v; pKernel->setAdditionalKernelExecInfo(AdditionalKernelExecInfo::DisableOverdispatch); - std::unique_ptr cmd(new CommandComputeKernel(*mockCmdQ.get(), blockedCommandsData, v, false, false, false, std::move(printfHandler), PreemptionMode::Disabled, pKernel, 1, nullptr)); + std::unique_ptr cmd(new CommandComputeKernel(*mockCmdQ.get(), blockedCommandsData, v, false, false, false, std::move(printfHandler), PreemptionMode::Disabled, pKernel, 1)); cmd->submit(1u, false); EXPECT_EQ(mockCsr->passedDispatchFlags.additionalKernelExecInfo, AdditionalKernelExecInfo::DisableOverdispatch); diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp index 9515fc57d7..4745488d67 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp @@ -2003,10 +2003,10 @@ HWTEST_F(PauseOnGpuTests, givenGpuScratchWriteEnabledWhenEstimatingCommandStream dispatchInfo.setKernel(mockKernel.mockKernel); multiDispatchInfo.push(dispatchInfo); - auto baseCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, nullptr); + auto baseCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false); DebugManager.flags.GpuScratchRegWriteAfterWalker.set(1); - auto extendedCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, nullptr); + auto extendedCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false); EXPECT_EQ(baseCommandStreamSize + sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM), extendedCommandStreamSize); } diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp index 39cf18a236..49871b6595 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp @@ -1014,8 +1014,8 @@ HWTEST_F(EnqueueKernelTest, givenTimestampWriteEnableWhenMarkerProfilingWithoutW dispatchInfo.setKernel(mockKernel.mockKernel); multiDispatchInfo.push(dispatchInfo); - auto baseCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, nullptr); - auto extendedCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, true, false, nullptr); + auto baseCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false); + auto extendedCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, true, false); EXPECT_EQ(baseCommandStreamSize + 4 * EncodeStoreMMIO::size + MemorySynchronizationCommands::getSizeForSingleBarrier(false), extendedCommandStreamSize); } @@ -1033,8 +1033,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, EnqueueKernelTest, givenTimestampWriteEnableOnMulti dispatchInfo.setKernel(mockKernel.mockKernel); multiDispatchInfo.push(dispatchInfo); - auto baseCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, nullptr); - auto extendedCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, true, false, nullptr); + auto baseCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false); + auto extendedCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, true, false); EXPECT_EQ(baseCommandStreamSize + 4 * EncodeStoreMMIO::size + ImplicitScalingDispatch::getBarrierSize(csr.peekHwInfo(), false, false), extendedCommandStreamSize); } @@ -1047,8 +1047,8 @@ HWTEST_F(EnqueueKernelTest, givenTimestampWriteEnableWhenMarkerProfilingWithWait dispatchInfo.setKernel(mockKernel.mockKernel); multiDispatchInfo.push(dispatchInfo); - auto baseCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, nullptr); - auto extendedCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, true, true, nullptr); + auto baseCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false); + auto extendedCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, true, true); EXPECT_EQ(baseCommandStreamSize + 4 * EncodeStoreMMIO::size, extendedCommandStreamSize); } diff --git a/opencl/test/unit_test/command_queue/get_size_required_buffer_tests.cpp b/opencl/test/unit_test/command_queue/get_size_required_buffer_tests.cpp index f0d5895991..3587e5e578 100644 --- a/opencl/test/unit_test/command_queue/get_size_required_buffer_tests.cpp +++ b/opencl/test/unit_test/command_queue/get_size_required_buffer_tests.cpp @@ -23,7 +23,6 @@ #include "opencl/test/unit_test/fixtures/hello_world_kernel_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/fixtures/simple_arg_kernel_fixture.h" -#include "opencl/test/unit_test/mocks/mock_event.h" using namespace NEO; @@ -97,7 +96,7 @@ HWTEST_F(GetSizeRequiredBufferTest, WhenFillingBufferThenHeapsAndCommandBufferCo auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_FILL_BUFFER, CsrDependencies(), false, false, - false, *pCmdQ, multiDispatchInfo, false, false, nullptr); + false, *pCmdQ, multiDispatchInfo, false, false); auto expectedSizeDSH = HardwareCommandsHelper::getTotalSizeRequiredDSH(multiDispatchInfo); auto expectedSizeIOH = HardwareCommandsHelper::getTotalSizeRequiredIOH(multiDispatchInfo); auto expectedSizeSSH = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); @@ -150,7 +149,7 @@ HWTEST_F(GetSizeRequiredBufferTest, WhenCopyingBufferThenHeapsAndCommandBufferCo auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_COPY_BUFFER, CsrDependencies(), false, false, - false, *pCmdQ, multiDispatchInfo, false, false, nullptr); + false, *pCmdQ, multiDispatchInfo, false, false); auto expectedSizeDSH = HardwareCommandsHelper::getTotalSizeRequiredDSH(multiDispatchInfo); auto expectedSizeIOH = HardwareCommandsHelper::getTotalSizeRequiredIOH(multiDispatchInfo); auto expectedSizeSSH = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); @@ -204,7 +203,7 @@ HWTEST_F(GetSizeRequiredBufferTest, WhenReadingBufferNonBlockingThenHeapsAndComm auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_READ_BUFFER, CsrDependencies(), false, false, - false, *pCmdQ, multiDispatchInfo, false, false, nullptr); + false, *pCmdQ, multiDispatchInfo, false, false); auto expectedSizeDSH = HardwareCommandsHelper::getTotalSizeRequiredDSH(multiDispatchInfo); auto expectedSizeIOH = HardwareCommandsHelper::getTotalSizeRequiredIOH(multiDispatchInfo); auto expectedSizeSSH = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); @@ -259,7 +258,7 @@ HWTEST_F(GetSizeRequiredBufferTest, WhenReadingBufferBlockingThenThenHeapsAndCom auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_READ_BUFFER, CsrDependencies(), false, false, - false, *pCmdQ, multiDispatchInfo, false, false, nullptr); + false, *pCmdQ, multiDispatchInfo, false, false); auto expectedSizeDSH = HardwareCommandsHelper::getTotalSizeRequiredDSH(multiDispatchInfo); auto expectedSizeIOH = HardwareCommandsHelper::getTotalSizeRequiredIOH(multiDispatchInfo); auto expectedSizeSSH = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); @@ -314,7 +313,7 @@ HWTEST_F(GetSizeRequiredBufferTest, WhenWritingBufferNonBlockingThenHeapsAndComm auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_WRITE_BUFFER, CsrDependencies(), false, false, - false, *pCmdQ, multiDispatchInfo, false, false, nullptr); + false, *pCmdQ, multiDispatchInfo, false, false); auto expectedSizeDSH = HardwareCommandsHelper::getTotalSizeRequiredDSH(multiDispatchInfo); auto expectedSizeIOH = HardwareCommandsHelper::getTotalSizeRequiredIOH(multiDispatchInfo); auto expectedSizeSSH = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); @@ -366,7 +365,7 @@ HWTEST_F(GetSizeRequiredBufferTest, WhenWritingBufferBlockingThenHeapsAndCommand auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_WRITE_BUFFER, CsrDependencies(), false, false, - false, *pCmdQ, multiDispatchInfo, false, false, nullptr); + false, *pCmdQ, multiDispatchInfo, false, false); auto expectedSizeDSH = HardwareCommandsHelper::getTotalSizeRequiredDSH(multiDispatchInfo); auto expectedSizeIOH = HardwareCommandsHelper::getTotalSizeRequiredIOH(multiDispatchInfo); auto expectedSizeSSH = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); @@ -381,68 +380,6 @@ HWTEST_F(GetSizeRequiredBufferTest, WhenWritingBufferBlockingThenHeapsAndCommand EXPECT_GE(expectedSizeSSH, usedAfterSSH - usedBeforeSSH); } -HWTEST_F(GetSizeRequiredBufferTest, GivenOutEventForSingleDeviceContextWhenCalculatingCSSizeThenExtraPipeControlIsNotAdded) { - UltClDeviceFactory deviceFactory{1, 0}; - DebugManager.flags.EnableMultiRootDeviceContexts.set(true); - - cl_device_id devices[] = {deviceFactory.rootDevices[0]}; - - MockContext pContext(ClDeviceVector(devices, 1)); - MockKernelWithInternals mockKernel(*pContext.getDevices()[0]); - DispatchInfo dispatchInfo; - MultiDispatchInfo multiDispatchInfo(mockKernel.mockKernel); - dispatchInfo.setKernel(mockKernel.mockKernel); - multiDispatchInfo.push(dispatchInfo); - auto event = std::make_unique>(&pContext, nullptr, 0, 0, 0); - cl_event clEvent = event.get(); - auto baseCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, nullptr); - auto extendedCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, &clEvent); - - EXPECT_EQ(baseCommandStreamSize, extendedCommandStreamSize); -} - -HWTEST_F(GetSizeRequiredBufferTest, GivenUserEventForMultiDeviceContextWhenCalculatingCSSizeThenExtraPipeControlIsNotAdded) { - UltClDeviceFactory deviceFactory{2, 0}; - DebugManager.flags.EnableMultiRootDeviceContexts.set(true); - - cl_device_id devices[] = {deviceFactory.rootDevices[0], - deviceFactory.rootDevices[1]}; - - MockContext pContext(ClDeviceVector(devices, 2)); - MockKernelWithInternals mockKernel(*pContext.getDevices()[0]); - DispatchInfo dispatchInfo; - MultiDispatchInfo multiDispatchInfo(mockKernel.mockKernel); - dispatchInfo.setKernel(mockKernel.mockKernel); - multiDispatchInfo.push(dispatchInfo); - auto userEvent1 = std::make_unique(&pContext); - cl_event clEvent = userEvent1.get(); - auto baseCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, nullptr); - auto extendedCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, &clEvent); - - EXPECT_EQ(baseCommandStreamSize, extendedCommandStreamSize); -} - -HWTEST_F(GetSizeRequiredBufferTest, GivenOutEventForMultiDeviceContextWhenCalculatingCSSizeThenExtraPipeControlIsAdded) { - UltClDeviceFactory deviceFactory{2, 0}; - DebugManager.flags.EnableMultiRootDeviceContexts.set(true); - - cl_device_id devices[] = {deviceFactory.rootDevices[0], - deviceFactory.rootDevices[1]}; - - MockContext pContext(ClDeviceVector(devices, 2)); - MockKernelWithInternals mockKernel(*pContext.getDevices()[0]); - DispatchInfo dispatchInfo; - MultiDispatchInfo multiDispatchInfo(mockKernel.mockKernel); - dispatchInfo.setKernel(mockKernel.mockKernel); - multiDispatchInfo.push(dispatchInfo); - auto event = std::make_unique>(&pContext, nullptr, 0, 0, 0); - cl_event clEvent = event.get(); - auto baseCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, nullptr); - auto extendedCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, &clEvent); - - EXPECT_EQ(baseCommandStreamSize + MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(pContext.getDevices()[0]->getHardwareInfo(), false), extendedCommandStreamSize); -} - HWTEST_F(GetSizeRequiredBufferTest, givenMultipleKernelRequiringSshWhenTotalSizeIsComputedThenItIsProperlyAligned) { auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, pCmdQ->getClDevice()); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp index bdfd14e7da..969ec67b0b 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp @@ -1904,7 +1904,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBlockedKernelWhenItIsUnblocke blockedCommandsData->setHeaps(dsh, ioh, ssh); std::vector surfaces; - event->setCommand(std::make_unique(*pCmdQ, blockedCommandsData, surfaces, false, false, false, nullptr, pDevice->getPreemptionMode(), pKernel, 1, nullptr)); + event->setCommand(std::make_unique(*pCmdQ, blockedCommandsData, surfaces, false, false, false, nullptr, pDevice->getPreemptionMode(), pKernel, 1)); event->submitCommand(false); EXPECT_EQ(numGrfRequired, csr->savedDispatchFlags.numGrfRequired); @@ -1949,7 +1949,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBlockedKernelWhenInitializeBc auto blockedCommandsData = std::make_unique(cmdStream, *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); std::vector surfaces; - event->setCommand(std::make_unique(*pCmdQ, blockedCommandsData, surfaces, false, false, false, nullptr, pDevice->getPreemptionMode(), pKernel, 1, nullptr)); + event->setCommand(std::make_unique(*pCmdQ, blockedCommandsData, surfaces, false, false, false, nullptr, pDevice->getPreemptionMode(), pKernel, 1)); event->submitCommand(false); EXPECT_FALSE(pCmdQ->isCsrLocked); } diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp index 16ca22431d..92b41058bb 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp @@ -7,7 +7,6 @@ #include "shared/source/command_stream/wait_status.h" #include "shared/test/common/mocks/mock_command_stream_receiver.h" -#include "shared/test/common/mocks/mock_timestamp_container.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/hw_test.h" @@ -15,7 +14,6 @@ #include "opencl/source/event/user_event.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h" -#include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" @@ -47,18 +45,12 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverBufferTests, givenMultipleEventInMu MockGraphicsAllocation svmAlloc(svmPtr, svmSize); Event event1(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 5, 15); - auto node1 = event1.getMultiRootTimestampSyncNode(); Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 6, 16); Event event3(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 4, 20); - auto node3 = event3.getMultiRootTimestampSyncNode(); Event event4(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 3, 4); - auto node4 = event4.getMultiRootTimestampSyncNode(); Event event5(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 2, 7); - auto node5 = event5.getMultiRootTimestampSyncNode(); UserEvent userEvent1(&pCmdQ1->getContext()); - userEvent1.getMultiRootTimestampSyncNode(); UserEvent userEvent2(&pCmdQ2->getContext()); - userEvent2.getMultiRootTimestampSyncNode(); userEvent1.setStatus(CL_COMPLETE); userEvent2.setStatus(CL_COMPLETE); @@ -95,12 +87,12 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverBufferTests, givenMultipleEventInMu EXPECT_EQ(2u, semaphores.size()); auto semaphoreCmd0 = genCmdCast(*(semaphores[0])); - EXPECT_EQ(1u, semaphoreCmd0->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(node4->getContextEndAddress(0u)), semaphoreCmd0->getSemaphoreGraphicsAddress()); + EXPECT_EQ(4u, semaphoreCmd0->getSemaphoreDataDword()); + EXPECT_EQ(reinterpret_cast(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress()); auto semaphoreCmd1 = genCmdCast(*(semaphores[1])); - EXPECT_EQ(1u, semaphoreCmd1->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(node5->getContextEndAddress(0u)), semaphoreCmd1->getSemaphoreGraphicsAddress()); + EXPECT_EQ(7u, semaphoreCmd1->getSemaphoreDataDword()); + EXPECT_EQ(reinterpret_cast(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress()); } { @@ -123,12 +115,12 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverBufferTests, givenMultipleEventInMu EXPECT_EQ(2u, semaphores.size()); auto semaphoreCmd0 = genCmdCast(*(semaphores[0])); - EXPECT_EQ(1u, semaphoreCmd0->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(node1->getContextEndAddress(0u)), semaphoreCmd0->getSemaphoreGraphicsAddress()); + EXPECT_EQ(15u, semaphoreCmd0->getSemaphoreDataDword()); + EXPECT_EQ(reinterpret_cast(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress()); auto semaphoreCmd1 = genCmdCast(*(semaphores[1])); - EXPECT_EQ(1u, semaphoreCmd1->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(node3->getContextEndAddress(0u)), semaphoreCmd1->getSemaphoreGraphicsAddress()); + EXPECT_EQ(20u, semaphoreCmd1->getSemaphoreDataDword()); + EXPECT_EQ(reinterpret_cast(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress()); } alignedFree(svmPtr); } @@ -155,24 +147,17 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverTests, givenMultipleEventInMultiRoo cl_device_id devices[] = {device1, device2, device3}; auto context = std::make_unique(ClDeviceVector(devices, 3), false); - auto mockTagAllocator = std::make_unique>(context->getRootDeviceIndices(), device1->getExecutionEnvironment()->memoryManager.get(), 10u); - std::unique_ptr uniquePtr(mockTagAllocator.release()); - context->setMultiRootDeviceTimestampPacketAllocator(uniquePtr); + auto pCmdQ1 = context->getSpecialQueue(1u); auto pCmdQ2 = context->getSpecialQueue(2u); auto pCmdQ3 = context->getSpecialQueue(3u); Event event1(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 5, 15); - auto node1 = event1.getMultiRootTimestampSyncNode(); Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 6, 16); Event event3(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 4, 20); - auto node3 = event3.getMultiRootTimestampSyncNode(); Event event4(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 3, 4); - auto node4 = event4.getMultiRootTimestampSyncNode(); Event event5(pCmdQ3, CL_COMMAND_NDRANGE_KERNEL, 7, 21); - auto node5 = event5.getMultiRootTimestampSyncNode(); Event event6(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 2, 7); - auto node6 = event6.getMultiRootTimestampSyncNode(); UserEvent userEvent1(&pCmdQ1->getContext()); UserEvent userEvent2(&pCmdQ2->getContext()); @@ -205,16 +190,16 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverTests, givenMultipleEventInMultiRoo EXPECT_EQ(3u, semaphores.size()); auto semaphoreCmd0 = genCmdCast(*(semaphores[0])); - EXPECT_EQ(1u, semaphoreCmd0->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(node4->getContextEndAddress(0u)), semaphoreCmd0->getSemaphoreGraphicsAddress()); + EXPECT_EQ(4u, semaphoreCmd0->getSemaphoreDataDword()); + EXPECT_EQ(reinterpret_cast(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress()); auto semaphoreCmd1 = genCmdCast(*(semaphores[1])); - EXPECT_EQ(1u, semaphoreCmd1->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(node5->getContextEndAddress(0u)), semaphoreCmd1->getSemaphoreGraphicsAddress()); + EXPECT_EQ(21u, semaphoreCmd1->getSemaphoreDataDword()); + EXPECT_EQ(reinterpret_cast(pCmdQ3->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress()); auto semaphoreCmd2 = genCmdCast(*(semaphores[2])); - EXPECT_EQ(1u, semaphoreCmd2->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(node6->getContextEndAddress(0u)), semaphoreCmd2->getSemaphoreGraphicsAddress()); + EXPECT_EQ(7u, semaphoreCmd2->getSemaphoreDataDword()); + EXPECT_EQ(reinterpret_cast(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd2->getSemaphoreGraphicsAddress()); } { @@ -230,16 +215,16 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverTests, givenMultipleEventInMultiRoo EXPECT_EQ(3u, semaphores.size()); auto semaphoreCmd0 = genCmdCast(*(semaphores[0])); - EXPECT_EQ(1u, semaphoreCmd0->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(node1->getContextEndAddress(0u)), semaphoreCmd0->getSemaphoreGraphicsAddress()); + EXPECT_EQ(15u, semaphoreCmd0->getSemaphoreDataDword()); + EXPECT_EQ(reinterpret_cast(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress()); auto semaphoreCmd1 = genCmdCast(*(semaphores[1])); - EXPECT_EQ(1u, semaphoreCmd1->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(node3->getContextEndAddress(0u)), semaphoreCmd1->getSemaphoreGraphicsAddress()); + EXPECT_EQ(20u, semaphoreCmd1->getSemaphoreDataDword()); + EXPECT_EQ(reinterpret_cast(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress()); auto semaphoreCmd2 = genCmdCast(*(semaphores[2])); - EXPECT_EQ(1u, semaphoreCmd2->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(node5->getContextEndAddress(0u)), semaphoreCmd2->getSemaphoreGraphicsAddress()); + EXPECT_EQ(21u, semaphoreCmd2->getSemaphoreDataDword()); + EXPECT_EQ(reinterpret_cast(pCmdQ3->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd2->getSemaphoreGraphicsAddress()); } { @@ -264,8 +249,8 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverTests, givenMultipleEventInMultiRoo EXPECT_EQ(1u, semaphores.size()); auto semaphoreCmd0 = genCmdCast(*(semaphores[0])); - EXPECT_EQ(1u, semaphoreCmd0->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(node1->getContextEndAddress(0u)), semaphoreCmd0->getSemaphoreGraphicsAddress()); + EXPECT_EQ(15u, semaphoreCmd0->getSemaphoreDataDword()); + EXPECT_EQ(reinterpret_cast(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress()); } } @@ -301,16 +286,11 @@ HWTEST_F(CrossDeviceDependenciesTests, givenMultipleEventInMultiRootDeviceEnviro using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; Event event1(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 5, 15); - event1.getMultiRootTimestampSyncNode(); Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 6, 16); Event event3(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 1, 6); - event3.getMultiRootTimestampSyncNode(); Event event4(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 4, 20); - event4.getMultiRootTimestampSyncNode(); Event event5(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 3, 4); - event5.getMultiRootTimestampSyncNode(); Event event6(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 2, 7); - event6.getMultiRootTimestampSyncNode(); UserEvent userEvent1(&pCmdQ1->getContext()); UserEvent userEvent2(&pCmdQ2->getContext()); @@ -336,10 +316,10 @@ HWTEST_F(CrossDeviceDependenciesTests, givenMultipleEventInMultiRootDeviceEnviro EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, nullptr); CsrDependencies csrDeps; - eventsRequest.fillCsrDependenciesForRootDevices(csrDeps, pCmdQ1->getGpgpuCommandStreamReceiver()); + eventsRequest.fillCsrDependenciesForTaskCountContainer(csrDeps, pCmdQ1->getGpgpuCommandStreamReceiver()); - EXPECT_EQ(0u, csrDeps.multiRootTimeStampSyncContainer.size()); - EXPECT_EQ(0u, TimestampPacketHelper::getRequiredCmdStreamSizeForMultiRootDeviceSyncNodesContainer(csrDeps)); + EXPECT_EQ(0u, csrDeps.taskCountContainer.size()); + EXPECT_EQ(0u, TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer(csrDeps)); } { @@ -362,10 +342,10 @@ HWTEST_F(CrossDeviceDependenciesTests, givenMultipleEventInMultiRootDeviceEnviro EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, nullptr); CsrDependencies csrDeps; - eventsRequest.fillCsrDependenciesForRootDevices(csrDeps, pCmdQ2->getGpgpuCommandStreamReceiver()); + eventsRequest.fillCsrDependenciesForTaskCountContainer(csrDeps, pCmdQ2->getGpgpuCommandStreamReceiver()); - EXPECT_EQ(3u, csrDeps.multiRootTimeStampSyncContainer.size()); - EXPECT_EQ(3u * sizeof(MI_SEMAPHORE_WAIT), TimestampPacketHelper::getRequiredCmdStreamSizeForMultiRootDeviceSyncNodesContainer(csrDeps)); + EXPECT_EQ(3u, csrDeps.taskCountContainer.size()); + EXPECT_EQ(3u * sizeof(MI_SEMAPHORE_WAIT), TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer(csrDeps)); } } @@ -396,7 +376,6 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW cl_event outputEvent2{}; - auto currentCsUsedCmdq1 = pCmdQ1->getCS(0).getUsed(); pCmdQ2->enqueueReadBuffer(buffer, CL_FALSE, 0, MemoryConstants::pageSize, hostPtr, nullptr, 1, &outputEvent1, @@ -420,12 +399,14 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW nullptr); { HardwareParse csHwParser; - csHwParser.parseCommands(pCmdQ1->getCS(0), currentCsUsedCmdq1); + csHwParser.parseCommands(pCmdQ1->getCS(0)); auto semaphores = findAll(csHwParser.cmdList.begin(), csHwParser.cmdList.end()); EXPECT_EQ(0u, semaphores.size()); } userEvent1.setStatus(CL_COMPLETE); + event1->release(); + event2->release(); pCmdQ1->finish(); pCmdQ2->finish(); { @@ -436,7 +417,7 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW EXPECT_EQ(1u, semaphores.size()); auto semaphoreCmd = genCmdCast(*(semaphores[0])); EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(event2->getMultiRootDeviceTimestampPacketNodes()->peekNodes().at(0)->getContextEndAddress(0u)), semaphoreCmd->getSemaphoreGraphicsAddress()); + EXPECT_EQ(reinterpret_cast(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd->getSemaphoreGraphicsAddress()); } { HardwareParse csHwParser; @@ -445,11 +426,9 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW EXPECT_EQ(1u, semaphores.size()); auto semaphoreCmd = genCmdCast(*(semaphores[0])); - EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(event1->getMultiRootDeviceTimestampPacketNodes()->peekNodes().at(0)->getContextEndAddress(0u)), semaphoreCmd->getSemaphoreGraphicsAddress()); + EXPECT_EQ(0u, semaphoreCmd->getSemaphoreDataDword()); + EXPECT_EQ(reinterpret_cast(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd->getSemaphoreGraphicsAddress()); } - event1->release(); - event2->release(); buffer->release(); } @@ -479,14 +458,14 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW char hostPtr[MemoryConstants::pageSize]{}; cl_event outputEvent2{}; - auto currentCsUsed = pCmdQ1->getCS(0).getUsed(); + pCmdQ1->enqueueReadBuffer(buffer, CL_FALSE, 0, MemoryConstants::pageSize, hostPtr, nullptr, 1, &outputEvent1, &outputEvent2); { HardwareParse csHwParser; - csHwParser.parseCommands(pCmdQ1->getCS(0), currentCsUsed); + csHwParser.parseCommands(pCmdQ1->getCS(0)); auto semaphores = findAll(csHwParser.cmdList.begin(), csHwParser.cmdList.end()); EXPECT_EQ(0u, semaphores.size()); @@ -503,7 +482,7 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW nullptr); { HardwareParse csHwParser; - csHwParser.parseCommands(pCmdQ1->getCS(0), currentCsUsed); + csHwParser.parseCommands(pCmdQ1->getCS(0)); auto semaphores = findAll(csHwParser.cmdList.begin(), csHwParser.cmdList.end()); EXPECT_EQ(0u, semaphores.size()); @@ -611,6 +590,9 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW EXPECT_EQ(0u, semaphores.size()); } userEvent1.setStatus(CL_COMPLETE); + event1->release(); + event2->release(); + event3->release(); pCmdQ1->finish(); pCmdQ2->finish(); @@ -622,8 +604,7 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW EXPECT_EQ(1u, semaphores.size()); auto semaphoreCmd = genCmdCast(*(semaphores[0])); EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword()); - auto node = event2->getMultiRootDeviceTimestampPacketNodes()->peekNodes().at(0); - EXPECT_EQ(node->getGpuAddress() + node->getContextEndOffset(), semaphoreCmd->getSemaphoreGraphicsAddress()); + EXPECT_EQ(reinterpret_cast(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd->getSemaphoreGraphicsAddress()); } { HardwareParse csHwParser; @@ -639,9 +620,8 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW EXPECT_EQ(2u, semaphores.size()); auto semaphoreCmd0 = genCmdCast(*(semaphores[0])); - EXPECT_EQ(1u, semaphoreCmd0->getSemaphoreDataDword()); - auto node = event1->getMultiRootDeviceTimestampPacketNodes()->peekNodes().at(0); - EXPECT_EQ(node->getGpuAddress() + node->getContextEndOffset(), semaphoreCmd0->getSemaphoreGraphicsAddress()); + EXPECT_EQ(0u, semaphoreCmd0->getSemaphoreDataDword()); + EXPECT_EQ(reinterpret_cast(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress()); } { HardwareParse csHwParser; @@ -650,9 +630,6 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW EXPECT_LE(1u, semaphores.size()); } - event1->release(); - event2->release(); - event3->release(); buffer->release(); pCmdQ1->release(); pCmdQ2->release(); @@ -984,73 +961,3 @@ HWTEST_F(BcsCrossDeviceMigrationTests, givenBufferWithMultiStorageWhenEnqueueRea EXPECT_EQ(buffer.get(), cmdQueue->migrateMultiGraphicsAllocationsReceivedOperationParams.srcMemObj); } - -HWTEST_F(CrossDeviceDependenciesTests, givenMultipleEventInMultiRootDeviceEnvironmentWhenTheyDoNotHaveMultiRootSyncNodeThenCsrDepsDoesNotHaveAnyMultiRootSyncContainer) { - Event event1(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 5, 15); - Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 6, 16); - Event event3(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 1, 6); - Event event4(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 4, 20); - Event event5(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 3, 4); - Event event6(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 2, 7); - UserEvent userEvent1(&pCmdQ1->getContext()); - UserEvent userEvent2(&pCmdQ2->getContext()); - - userEvent1.setStatus(CL_COMPLETE); - userEvent2.setStatus(CL_COMPLETE); - { - cl_event eventWaitList[] = - { - &event1, - &event2, - &event3, - &event4, - &event5, - &event6, - &userEvent1, - }; - cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); - - EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, nullptr); - CsrDependencies csrDeps; - eventsRequest.fillCsrDependenciesForRootDevices(csrDeps, pCmdQ2->getGpgpuCommandStreamReceiver()); - - EXPECT_EQ(0u, csrDeps.multiRootTimeStampSyncContainer.size()); - } -} -HWTEST_F(CrossDeviceDependenciesTests, givenMultipleEventInMultiRootDeviceEnvironmentWhenTheyDoNotHaveMultiRootSyncNodeContainersThenCsrDepsDoesNotHaveAnyMultiRootSyncContainer) { - - MockEvent event1(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 5, 15); - event1.multiRootDeviceTimestampPacketContainer.reset(new TimestampPacketContainer()); - MockEvent event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 6, 16); - MockEvent event3(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 4, 20); - event3.multiRootDeviceTimestampPacketContainer.reset(new TimestampPacketContainer()); - MockEvent event4(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 3, 4); - event4.multiRootDeviceTimestampPacketContainer.reset(new TimestampPacketContainer()); - MockEvent event5(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 3, 4); - event5.multiRootDeviceTimestampPacketContainer.reset(new TimestampPacketContainer()); - MockEvent event6(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 2, 7); - event6.multiRootDeviceTimestampPacketContainer.reset(new TimestampPacketContainer()); - UserEvent userEvent1(&pCmdQ1->getContext()); - - userEvent1.setStatus(CL_COMPLETE); - - { - cl_event eventWaitList[] = - { - &event1, - &event2, - &event3, - &event4, - &event5, - &event6, - &userEvent1, - }; - cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); - - EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, nullptr); - CsrDependencies csrDeps; - eventsRequest.fillCsrDependenciesForRootDevices(csrDeps, pCmdQ2->getGpgpuCommandStreamReceiver()); - - EXPECT_EQ(0u, csrDeps.multiRootTimeStampSyncContainer.size()); - } -} diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp index 504b4a885d..12caf89da5 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp @@ -11,7 +11,6 @@ #include "shared/source/command_stream/wait_status.h" #include "shared/source/helpers/constants.h" #include "shared/source/helpers/logical_state_helper.h" -#include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp index c73bd35dec..33f481cb0d 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp @@ -1791,4 +1791,4 @@ HWTEST_F(BcsTests, givenHostPtrToImageWhenBlitBufferIsCalledThenBlitCmdIsFound) hwParser.parseCommands(csr.commandStream, 0); auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); EXPECT_NE(hwParser.cmdList.end(), cmdIterator); -} \ No newline at end of file +} diff --git a/opencl/test/unit_test/event/event_builder_tests.cpp b/opencl/test/unit_test/event/event_builder_tests.cpp index ff0cda16de..234248a617 100644 --- a/opencl/test/unit_test/event/event_builder_tests.cpp +++ b/opencl/test/unit_test/event/event_builder_tests.cpp @@ -79,7 +79,7 @@ TEST(EventBuilder, givenVirtualEventWithCommandThenFinalizeAddChild) { public: using CommandComputeKernel::eventsWaitlist; MockCommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr &kernelOperation, std::vector &surfaces, Kernel *kernel) - : CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0, nullptr) {} + : CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0) {} }; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); @@ -129,7 +129,7 @@ TEST(EventBuilder, givenVirtualEventWithSubmittedCommandAsParentThenFinalizeNotA public: using CommandComputeKernel::eventsWaitlist; MockCommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr &kernelOperation, std::vector &surfaces, Kernel *kernel) - : CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0, nullptr) {} + : CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0) {} }; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); diff --git a/opencl/test/unit_test/event/event_tests.cpp b/opencl/test/unit_test/event/event_tests.cpp index 9ef77c60a3..4d58140ed5 100644 --- a/opencl/test/unit_test/event/event_tests.cpp +++ b/opencl/test/unit_test/event/event_tests.cpp @@ -486,7 +486,7 @@ TEST_F(InternalsEventTest, GivenSubmitCommandFalseWhenSubmittingCommandsThenRefA PreemptionMode preemptionMode = pDevice->getPreemptionMode(); v.push_back(bufferSurf); - auto cmd = new CommandComputeKernel(cmdQ, blockedCommandsData, v, false, false, false, nullptr, preemptionMode, pKernel, 1, nullptr); + auto cmd = new CommandComputeKernel(cmdQ, blockedCommandsData, v, false, false, false, nullptr, preemptionMode, pKernel, 1); event.setCommand(std::unique_ptr(cmd)); auto taskLevelBefore = csr.peekTaskLevel(); @@ -529,7 +529,7 @@ TEST_F(InternalsEventTest, GivenSubmitCommandTrueWhenSubmittingCommandsThenRefAp NullSurface *surface = new NullSurface; v.push_back(surface); PreemptionMode preemptionMode = pDevice->getPreemptionMode(); - auto cmd = new CommandComputeKernel(cmdQ, blockedCommandsData, v, false, false, false, nullptr, preemptionMode, pKernel, 1, nullptr); + auto cmd = new CommandComputeKernel(cmdQ, blockedCommandsData, v, false, false, false, nullptr, preemptionMode, pKernel, 1); event.setCommand(std::unique_ptr(cmd)); auto taskLevelBefore = csr.peekTaskLevel(); @@ -580,7 +580,7 @@ TEST_F(InternalsEventTest, givenBlockedKernelWithPrintfWhenSubmittedThenPrintOut std::vector v; PreemptionMode preemptionMode = pDevice->getPreemptionMode(); - auto cmd = new CommandComputeKernel(mockCmdQueue, blockedCommandsData, v, false, false, false, std::move(printfHandler), preemptionMode, pKernel, 1, nullptr); + auto cmd = new CommandComputeKernel(mockCmdQueue, blockedCommandsData, v, false, false, false, std::move(printfHandler), preemptionMode, pKernel, 1); event.setCommand(std::unique_ptr(cmd)); event.submitCommand(false); @@ -632,7 +632,7 @@ TEST_F(InternalsEventTest, givenGpuHangOnCmdQueueWaitFunctionAndBlockedKernelWit std::vector v; PreemptionMode preemptionMode = pDevice->getPreemptionMode(); - auto cmd = new CommandComputeKernel(mockCmdQueue, blockedCommandsData, v, false, false, false, std::move(printfHandler), preemptionMode, pKernel, 1, nullptr); + auto cmd = new CommandComputeKernel(mockCmdQueue, blockedCommandsData, v, false, false, false, std::move(printfHandler), preemptionMode, pKernel, 1); event.setCommand(std::unique_ptr(cmd)); event.submitCommand(false); @@ -681,7 +681,7 @@ TEST_F(InternalsEventTest, givenGpuHangOnPrintingEnqueueOutputAndBlockedKernelWi std::vector v; PreemptionMode preemptionMode = pDevice->getPreemptionMode(); - auto cmd = new CommandComputeKernel(mockCmdQueue, blockedCommandsData, v, false, false, false, std::move(printfHandler), preemptionMode, pKernel, 1, nullptr); + auto cmd = new CommandComputeKernel(mockCmdQueue, blockedCommandsData, v, false, false, false, std::move(printfHandler), preemptionMode, pKernel, 1); event.setCommand(std::unique_ptr(cmd)); event.submitCommand(false); @@ -1170,7 +1170,7 @@ HWTEST_F(EventTest, givenVirtualEventWhenCommandSubmittedThenLockCsrOccurs) { public: using CommandComputeKernel::eventsWaitlist; MockCommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr &kernelOperation, std::vector &surfaces, Kernel *kernel) - : CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0, nullptr) {} + : CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0) {} }; class MockEvent : public Event { public: @@ -1751,7 +1751,7 @@ HWTEST_F(InternalsEventTest, givenAbortedCommandWhenSubmitCalledThenDontUpdateFl blockedCommandsData->setHeaps(dsh, ioh, ssh); PreemptionMode preemptionMode = pDevice->getPreemptionMode(); std::vector v; - auto cmd = new CommandComputeKernel(*pCmdQ, blockedCommandsData, v, false, false, false, nullptr, preemptionMode, pKernel, 1, nullptr); + auto cmd = new CommandComputeKernel(*pCmdQ, blockedCommandsData, v, false, false, false, nullptr, preemptionMode, pKernel, 1); event->setCommand(std::unique_ptr(cmd)); FlushStamp expectedFlushStamp = 0; @@ -1894,35 +1894,3 @@ TEST(EventTimestampTest, givenEnableTimestampWaitWhenCheckIsTimestampWaitEnabled EXPECT_TRUE(event.isWaitForTimestampsEnabled()); } } -TEST(MultiRootEvent, givenContextWithMultiRootTagAllocatorWhenEventGetsTagThenNewAllocatorIsNotCreated) { - auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); - MockContext context{}; - MockCommandQueue cmdQ(&context, mockDevice.get(), 0, false); - auto allocator = cmdQ.getGpgpuCommandStreamReceiver().createMultiRootDeviceTimestampPacketAllocator(context.getRootDeviceIndices()); - auto allocatorPtr = allocator.get(); - context.setMultiRootDeviceTimestampPacketAllocator(allocator); - MockEvent event{&cmdQ, CL_COMMAND_MARKER, 0, 0}; - event.getMultiRootTimestampSyncNode(); - EXPECT_EQ(allocatorPtr, context.getMultiRootDeviceTimestampPacketAllocator()); -} -TEST(MultiRootEvent, givenContextWithoutMultiRootTagAllocatorWhenEventGetsTagThenNewAllocatorIsCreated) { - auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); - MockContext context{}; - MockCommandQueue cmdQ(&context, mockDevice.get(), 0, false); - EXPECT_EQ(context.getMultiRootDeviceTimestampPacketAllocator(), nullptr); - MockEvent event{&cmdQ, CL_COMMAND_MARKER, 0, 0}; - event.getMultiRootTimestampSyncNode(); - EXPECT_NE(context.getMultiRootDeviceTimestampPacketAllocator(), nullptr); -} -TEST(MultiRootEvent, givenEventWithTagWhenEventGetsNewTagThenNewTagContainerIsNotCreated) { - auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); - MockContext context{}; - MockCommandQueue cmdQ(&context, mockDevice.get(), 0, false); - MockEvent event{&cmdQ, CL_COMMAND_MARKER, 0, 0}; - EXPECT_EQ(event.getMultiRootDeviceTimestampPacketNodes(), nullptr); - event.getMultiRootTimestampSyncNode(); - auto containerPtr = event.getMultiRootDeviceTimestampPacketNodes(); - EXPECT_NE(containerPtr, nullptr); - event.getMultiRootTimestampSyncNode(); - EXPECT_EQ(containerPtr, event.getMultiRootDeviceTimestampPacketNodes()); -} \ No newline at end of file diff --git a/opencl/test/unit_test/gtpin/gtpin_tests.cpp b/opencl/test/unit_test/gtpin/gtpin_tests.cpp index df7c3da23a..98e8f404d4 100644 --- a/opencl/test/unit_test/gtpin/gtpin_tests.cpp +++ b/opencl/test/unit_test/gtpin/gtpin_tests.cpp @@ -2435,7 +2435,7 @@ HWTEST_F(GTPinTests, givenGtPinInitializedWhenSubmittingKernelCommandThenFlushed gtpinNotifyKernelSubmit(kernel.mockMultiDeviceKernel, mockCmdQ.get()); - std::unique_ptr command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, ndRangeKernel, nullptr, preemptionMode, kernel, 1, nullptr)); + std::unique_ptr command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, ndRangeKernel, nullptr, preemptionMode, kernel, 1)); CompletionStamp stamp = command->submit(20, false); ASSERT_EQ(1u, kernelExecQueue.size()); diff --git a/opencl/test/unit_test/helpers/task_information_tests.cpp b/opencl/test/unit_test/helpers/task_information_tests.cpp index 302f090012..b4489548af 100644 --- a/opencl/test/unit_test/helpers/task_information_tests.cpp +++ b/opencl/test/unit_test/helpers/task_information_tests.cpp @@ -154,7 +154,7 @@ TEST(CommandTest, givenWaitlistRequestWhenCommandComputeKernelIsCreatedThenMakeL public: using CommandComputeKernel::eventsWaitlist; MockCommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr &kernelOperation, std::vector &surfaces, Kernel *kernel) - : CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0, nullptr) {} + : CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0) {} }; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); @@ -291,7 +291,7 @@ HWTEST_F(DispatchFlagsTests, givenCommandComputeKernelWhenSubmitThenPassCorrectD for (auto &surface : surfaces) { requiresCoherency |= surface->IsCoherent; } - std::unique_ptr command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, ndRangeKernel, nullptr, preemptionMode, kernel, 1, nullptr)); + std::unique_ptr command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, ndRangeKernel, nullptr, preemptionMode, kernel, 1)); command->submit(20, false); EXPECT_FALSE(mockCsr->passedDispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode); @@ -339,7 +339,7 @@ HWTEST_F(DispatchFlagsTests, givenClCommandCopyImageWhenSubmitThenFlushTextureCa for (auto &surface : surfaces) { requiresCoherency |= surface->IsCoherent; } - std::unique_ptr command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, commandType, nullptr, preemptionMode, kernel, 1, nullptr)); + std::unique_ptr command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, commandType, nullptr, preemptionMode, kernel, 1)); command->submit(20, false); EXPECT_FALSE(mockCsr->passedDispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode); @@ -425,7 +425,7 @@ HWTEST_F(DispatchFlagsTests, givenCommandComputeKernelWhenSubmitThenPassCorrectD bool flushDC = false; bool slmUsed = false; bool ndRangeKernel = false; - std::unique_ptr command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, ndRangeKernel, nullptr, preemptionMode, kernel, 1, nullptr)); + std::unique_ptr command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, ndRangeKernel, nullptr, preemptionMode, kernel, 1)); command->submit(20, false); EXPECT_TRUE(mockCsr->passedDispatchFlags.epilogueRequired); diff --git a/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp b/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp index 5d1fdd51b3..1abdebb924 100644 --- a/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp +++ b/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp @@ -34,11 +34,11 @@ HWCMDTEST_F(IGFX_GEN8_CORE, TimestampPacketTests, givenTimestampPacketWriteEnabl auto mockCmdQHw = std::make_unique>(context, device.get(), nullptr); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; - getCommandStream(*mockCmdQHw, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false, nullptr); + getCommandStream(*mockCmdQHw, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false); auto sizeWithDisabled = mockCmdQHw->requestedCmdStreamSize; device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; - getCommandStream(*mockCmdQHw, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false, nullptr); + getCommandStream(*mockCmdQHw, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false); auto sizeWithEnabled = mockCmdQHw->requestedCmdStreamSize; auto extendedSize = sizeWithDisabled + sizeof(typename FamilyType::PIPE_CONTROL); @@ -52,7 +52,7 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledAndOoqWhenEstimat device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; getCommandStream(*mockCmdQHw, CsrDependencies(), false, false, - false, multiDispatchInfo, nullptr, 0, false, false, nullptr); + false, multiDispatchInfo, nullptr, 0, false, false); auto sizeWithDisabled = mockCmdQHw->requestedCmdStreamSize; device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; @@ -82,7 +82,7 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledAndOoqWhenEstimat eventsRequest.fillCsrDependenciesForTimestampPacketContainer( csrDeps, device->getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr); - getCommandStream(*mockCmdQHw, csrDeps, false, false, false, multiDispatchInfo, nullptr, 0, false, false, nullptr); + getCommandStream(*mockCmdQHw, csrDeps, false, false, false, multiDispatchInfo, nullptr, 0, false, false); auto sizeWithEnabled = mockCmdQHw->requestedCmdStreamSize; size_t sizeForNodeDependency = 0; @@ -143,7 +143,7 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEstimatingStr auto mockCmdQHw = std::make_unique>(context, device.get(), nullptr); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; - getCommandStream(*mockCmdQHw, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false, nullptr); + getCommandStream(*mockCmdQHw, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false); auto sizeWithDisabled = mockCmdQHw->requestedCmdStreamSize; device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; @@ -172,7 +172,7 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEstimatingStr CsrDependencies csrDeps; eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, device->getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr); - getCommandStream(*mockCmdQHw, csrDeps, false, false, false, multiDispatchInfo, nullptr, 0, false, false, nullptr); + getCommandStream(*mockCmdQHw, csrDeps, false, false, false, multiDispatchInfo, nullptr, 0, false, false); auto sizeWithEnabled = mockCmdQHw->requestedCmdStreamSize; size_t sizeForNodeDependency = 0; diff --git a/opencl/test/unit_test/kernel/kernel_cache_flush_requirements_tests.cpp b/opencl/test/unit_test/kernel/kernel_cache_flush_requirements_tests.cpp index cef27f5bdc..5bbdba6a64 100644 --- a/opencl/test/unit_test/kernel/kernel_cache_flush_requirements_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_cache_flush_requirements_tests.cpp @@ -212,7 +212,7 @@ HWTEST2_F(KernelWithCacheFlushTests, givenCacheFlushRequiredWhenEstimatingThenAd { EXPECT_FALSE(mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ)); - initialSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, csrDeps, false, false, false, *cmdQ, multiDispatchInfo, false, false, nullptr); + initialSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, csrDeps, false, false, false, *cmdQ, multiDispatchInfo, false, false); } { @@ -226,7 +226,7 @@ HWTEST2_F(KernelWithCacheFlushTests, givenCacheFlushRequiredWhenEstimatingThenAd ultCsr.multiOsContextCapable = false; EXPECT_TRUE(mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ)); - sizeWithCacheFlush = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, csrDeps, false, false, false, *cmdQ, multiDispatchInfo, false, false, nullptr); + sizeWithCacheFlush = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, csrDeps, false, false, false, *cmdQ, multiDispatchInfo, false, false); } EXPECT_EQ(initialSize + expectedDiff, sizeWithCacheFlush); diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index 8c6b3d2b45..4d972e4d06 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -504,7 +504,6 @@ class CommandStreamReceiverMock : public CommandStreamReceiver { using BaseClass::CommandStreamReceiver; TagAllocatorBase *getTimestampPacketAllocator() override { return nullptr; } - std::unique_ptr createMultiRootDeviceTimestampPacketAllocator(const RootDeviceIndicesContainer rootDeviceIndices) override { return std::unique_ptr(nullptr); } SubmissionStatus flushTagUpdate() override { return SubmissionStatus::SUCCESS; }; void updateTagFromWait() override{}; diff --git a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp index 039762476a..54d7a035ad 100644 --- a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp @@ -767,11 +767,11 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBufferOperationWithoutKernelWhenEstimati auto &hwInfo = cmdQ->getDevice().getHardwareInfo(); auto readBufferCmdsSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_READ_BUFFER, csrDependencies, false, false, - true, *cmdQ, multiDispatchInfo, false, false, nullptr); + true, *cmdQ, multiDispatchInfo, false, false); auto writeBufferCmdsSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_WRITE_BUFFER, csrDependencies, false, false, - true, *cmdQ, multiDispatchInfo, false, false, nullptr); + true, *cmdQ, multiDispatchInfo, false, false); auto copyBufferCmdsSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_COPY_BUFFER, csrDependencies, false, false, - true, *cmdQ, multiDispatchInfo, false, false, nullptr); + true, *cmdQ, multiDispatchInfo, false, false); auto expectedSize = TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue(); if (cmdQ->isCacheFlushForBcsRequired()) { diff --git a/opencl/test/unit_test/mocks/mock_command_queue.h b/opencl/test/unit_test/mocks/mock_command_queue.h index 6a501e2546..553b0f9397 100644 --- a/opencl/test/unit_test/mocks/mock_command_queue.h +++ b/opencl/test/unit_test/mocks/mock_command_queue.h @@ -249,7 +249,6 @@ class MockCommandQueueHw : public CommandQueueHw { using BaseClass::latestSentEnqueueType; using BaseClass::obtainCommandStream; using BaseClass::obtainNewTimestampPacketNodes; - using BaseClass::processDispatchForKernels; using BaseClass::requiresCacheFlushAfterWalker; using BaseClass::throttle; using BaseClass::timestampPacketContainer; diff --git a/opencl/test/unit_test/mocks/mock_context.cpp b/opencl/test/unit_test/mocks/mock_context.cpp index f4e7701939..7e91408674 100644 --- a/opencl/test/unit_test/mocks/mock_context.cpp +++ b/opencl/test/unit_test/mocks/mock_context.cpp @@ -116,6 +116,10 @@ void MockContext::initializeWithDevices(const ClDeviceVector &devices, bool noSp if (pDevice->getRootDeviceIndex() == rootDeviceIndex) { deviceBitfield |= pDevice->getDeviceBitfield(); } + for (auto &engine : pDevice->getDevice().getAllEngines()) { + if (engine.commandStreamReceiver->getTagsMultiAllocation()) + engine.commandStreamReceiver->ensureTagAllocationForRootDeviceIndex(rootDeviceIndex); + } } deviceBitfields.insert({rootDeviceIndex, deviceBitfield}); } diff --git a/opencl/test/unit_test/mocks/mock_event.h b/opencl/test/unit_test/mocks/mock_event.h index 6636dd9362..bde34c8904 100644 --- a/opencl/test/unit_test/mocks/mock_event.h +++ b/opencl/test/unit_test/mocks/mock_event.h @@ -39,7 +39,6 @@ struct MockEvent : public BaseEventType { using Event::calculateSubmitTimestampData; using Event::isWaitForTimestampsEnabled; using Event::magic; - using Event::multiRootDeviceTimestampPacketContainer; using Event::queueTimeStamp; using Event::submitTimeStamp; using Event::timestampPacketContainer; diff --git a/opencl/test/unit_test/profiling/profiling_tests.cpp b/opencl/test/unit_test/profiling/profiling_tests.cpp index 7dec14a546..92beea833f 100644 --- a/opencl/test/unit_test/profiling/profiling_tests.cpp +++ b/opencl/test/unit_test/profiling/profiling_tests.cpp @@ -71,13 +71,13 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfilingAndFor MultiDispatchInfo multiDispatchInfo(&kernel); auto &commandStreamNDRangeKernel = getCommandStream(*pCmdQ, CsrDependencies(), true, false, false, - multiDispatchInfo, nullptr, 0, false, false, nullptr); + multiDispatchInfo, nullptr, 0, false, false); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, true, false, *pCmdQ, &kernel, {}); EXPECT_GE(expectedSizeCS, requiredSize); EXPECT_GE(commandStreamNDRangeKernel.getAvailableSpace(), requiredSize); auto &commandStreamTask = getCommandStream(*pCmdQ, CsrDependencies(), true, false, false, - multiDispatchInfo, nullptr, 0, false, false, nullptr); + multiDispatchInfo, nullptr, 0, false, false); expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_TASK, true, false, *pCmdQ, &kernel, {}); EXPECT_GE(expectedSizeCS, requiredSize); EXPECT_GE(commandStreamTask.getAvailableSpace(), requiredSize); @@ -93,13 +93,13 @@ HWTEST_F(ProfilingTests, GivenCommandQueueWithProfilingAndForWorkloadWithNoKerne MultiDispatchInfo multiDispatchInfo(nullptr); auto &commandStreamMigrateMemObjects = getCommandStream(*pCmdQ, CsrDependencies(), true, false, false, - multiDispatchInfo, nullptr, 0, false, false, nullptr); + multiDispatchInfo, nullptr, 0, false, false); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_MIGRATE_MEM_OBJECTS, true, false, *pCmdQ, nullptr, {}); EXPECT_GE(expectedSizeCS, requiredSize); EXPECT_GE(commandStreamMigrateMemObjects.getAvailableSpace(), requiredSize); auto &commandStreamMarker = getCommandStream(*pCmdQ, CsrDependencies(), true, - false, false, multiDispatchInfo, nullptr, 0, false, false, nullptr); + false, false, multiDispatchInfo, nullptr, 0, false, false); expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_MARKER, true, false, *pCmdQ, nullptr, {}); EXPECT_GE(expectedSizeCS, requiredSize); EXPECT_GE(commandStreamMarker.getAvailableSpace(), requiredSize); @@ -121,9 +121,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfilingAndFor multiDispatchInfo.push(dispatchInfo); multiDispatchInfo.push(dispatchInfo); auto &commandStreamTask = getCommandStream(*pCmdQ, CsrDependencies(), true, false, false, - multiDispatchInfo, nullptr, 0, false, false, nullptr); + multiDispatchInfo, nullptr, 0, false, false); auto expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_TASK, CsrDependencies(), true, false, - false, *pCmdQ, multiDispatchInfo, false, false, nullptr); + false, *pCmdQ, multiDispatchInfo, false, false); EXPECT_GE(expectedSizeCS, requiredSize); EXPECT_GE(commandStreamTask.getAvailableSpace(), requiredSize); } @@ -741,13 +741,13 @@ HWTEST_F(ProfilingWithPerfCountersTests, GivenCommandQueueWithProfilingPerfCount MultiDispatchInfo multiDispatchInfo(nullptr); auto &commandStreamMigrateMemObjects = getCommandStream(*pCmdQ, CsrDependencies(), true, true, false, multiDispatchInfo, - nullptr, 0, false, false, nullptr); + nullptr, 0, false, false); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_MIGRATE_MEM_OBJECTS, true, true, *pCmdQ, nullptr, {}); EXPECT_GE(expectedSizeCS, requiredSize); EXPECT_GE(commandStreamMigrateMemObjects.getAvailableSpace(), requiredSize); auto &commandStreamMarker = getCommandStream(*pCmdQ, CsrDependencies(), true, true, false, - multiDispatchInfo, nullptr, 0, false, false, nullptr); + multiDispatchInfo, nullptr, 0, false, false); expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_MARKER, true, true, *pCmdQ, nullptr, {}); EXPECT_GE(expectedSizeCS, requiredSize); EXPECT_GE(commandStreamMarker.getAvailableSpace(), requiredSize); diff --git a/opencl/test/unit_test/xe_hpc_core/pvc/command_stream_receiver_hw_tests_pvc.cpp b/opencl/test/unit_test/xe_hpc_core/pvc/command_stream_receiver_hw_tests_pvc.cpp index 1d71c71ad1..4d23062f48 100644 --- a/opencl/test/unit_test/xe_hpc_core/pvc/command_stream_receiver_hw_tests_pvc.cpp +++ b/opencl/test/unit_test/xe_hpc_core/pvc/command_stream_receiver_hw_tests_pvc.cpp @@ -243,14 +243,10 @@ PVCTEST_F(PvcMultiRootDeviceCommandStreamReceiverBufferTests, givenMultipleEvent MockGraphicsAllocation svmAlloc(svmPtr, svmSize); Event event1(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 5, 15); - auto node1 = event1.getMultiRootTimestampSyncNode(); Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 6, 16); Event event3(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 4, 20); - auto node3 = event3.getMultiRootTimestampSyncNode(); Event event4(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 3, 4); - auto node4 = event4.getMultiRootTimestampSyncNode(); Event event5(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 2, 7); - auto node5 = event5.getMultiRootTimestampSyncNode(); UserEvent userEvent1(&pCmdQ1->getContext()); UserEvent userEvent2(&pCmdQ2->getContext()); @@ -289,12 +285,12 @@ PVCTEST_F(PvcMultiRootDeviceCommandStreamReceiverBufferTests, givenMultipleEvent EXPECT_EQ(3u, semaphores.size()); auto semaphoreCmd0 = genCmdCast(*(semaphores[0])); - EXPECT_EQ(1u, semaphoreCmd0->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(node4->getContextEndAddress(0u)), semaphoreCmd0->getSemaphoreGraphicsAddress()); + EXPECT_EQ(4u, semaphoreCmd0->getSemaphoreDataDword()); + EXPECT_EQ(reinterpret_cast(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress()); auto semaphoreCmd1 = genCmdCast(*(semaphores[1])); - EXPECT_EQ(1u, semaphoreCmd1->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(node5->getContextEndAddress(0u)), semaphoreCmd1->getSemaphoreGraphicsAddress()); + EXPECT_EQ(7u, semaphoreCmd1->getSemaphoreDataDword()); + EXPECT_EQ(reinterpret_cast(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress()); } { @@ -317,12 +313,12 @@ PVCTEST_F(PvcMultiRootDeviceCommandStreamReceiverBufferTests, givenMultipleEvent EXPECT_EQ(3u, semaphores.size()); auto semaphoreCmd0 = genCmdCast(*(semaphores[0])); - EXPECT_EQ(1u, semaphoreCmd0->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(node1->getContextEndAddress(0u)), semaphoreCmd0->getSemaphoreGraphicsAddress()); + EXPECT_EQ(15u, semaphoreCmd0->getSemaphoreDataDword()); + EXPECT_EQ(reinterpret_cast(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress()); auto semaphoreCmd1 = genCmdCast(*(semaphores[1])); - EXPECT_EQ(1u, semaphoreCmd1->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(node3->getContextEndAddress(0u)), semaphoreCmd1->getSemaphoreGraphicsAddress()); + EXPECT_EQ(20u, semaphoreCmd1->getSemaphoreDataDword()); + EXPECT_EQ(reinterpret_cast(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress()); } alignedFree(svmPtr); } diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index edebd23625..a1acb1a642 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -221,7 +221,6 @@ class CommandStreamReceiver { TagAllocatorBase *getEventTsAllocator(); TagAllocatorBase *getEventPerfCountAllocator(const uint32_t tagSize); virtual TagAllocatorBase *getTimestampPacketAllocator() = 0; - virtual std::unique_ptr createMultiRootDeviceTimestampPacketAllocator(const RootDeviceIndicesContainer rootDeviceIndices) = 0; virtual bool expectMemory(const void *gfxAddress, const void *srcAddress, size_t length, uint32_t compareOperation); diff --git a/shared/source/command_stream/command_stream_receiver_hw.h b/shared/source/command_stream/command_stream_receiver_hw.h index 40699214f6..9da3c83fb2 100644 --- a/shared/source/command_stream/command_stream_receiver_hw.h +++ b/shared/source/command_stream/command_stream_receiver_hw.h @@ -130,7 +130,6 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { GraphicsAllocation *getClearColorAllocation() override; TagAllocatorBase *getTimestampPacketAllocator() override; - std::unique_ptr createMultiRootDeviceTimestampPacketAllocator(const RootDeviceIndicesContainer rootDeviceIndices) override; void postInitFlagsSetup() override; void programActivePartitionConfig(LinearStream &csr); diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index f41487055e..847dd2f8b5 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -407,7 +407,7 @@ CompletionStamp CommandStreamReceiverHw::flushTask( auto commandStreamStartCSR = commandStreamCSR.getUsed(); TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer(commandStreamCSR, dispatchFlags.csrDependencies); - TimestampPacketHelper::programCsrDependenciesForForMultiRootDeviceSyncContainer(commandStreamCSR, dispatchFlags.csrDependencies); + TimestampPacketHelper::programCsrDependenciesForForTaskCountContainer(commandStreamCSR, dispatchFlags.csrDependencies); programActivePartitionConfigFlushTask(commandStreamCSR); programEngineModeCommands(commandStreamCSR, dispatchFlags); @@ -980,7 +980,7 @@ size_t CommandStreamReceiverHw::getRequiredCmdStreamSize(const Dispat } size += TimestampPacketHelper::getRequiredCmdStreamSize(dispatchFlags.csrDependencies); - size += TimestampPacketHelper::getRequiredCmdStreamSizeForMultiRootDeviceSyncNodesContainer(dispatchFlags.csrDependencies); + size += TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer(dispatchFlags.csrDependencies); size += EncodeKernelArgsBuffer::getKernelArgsBufferCmdsSize(kernelArgsBufferAllocation, logicalStateHelper.get()); @@ -1196,7 +1196,7 @@ TaskCountType CommandStreamReceiverHw::flushBcsTask(const BlitPropert for (auto &blitProperties : blitPropertiesContainer) { TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer(commandStream, blitProperties.csrDependencies); - TimestampPacketHelper::programCsrDependenciesForForMultiRootDeviceSyncContainer(commandStream, blitProperties.csrDependencies); + TimestampPacketHelper::programCsrDependenciesForForTaskCountContainer(commandStream, blitProperties.csrDependencies); BlitCommandsHelper::encodeWa(commandStream, blitProperties, latestSentBcsWaValue); @@ -1229,12 +1229,6 @@ TaskCountType CommandStreamReceiverHw::flushBcsTask(const BlitPropert if (blitProperties.clearColorAllocation) { makeResident(*blitProperties.clearColorAllocation); } - if (blitProperties.multiRootDeviceEventSync != nullptr) { - MiFlushArgs args; - args.commandWithPostSync = true; - args.notifyEnable = isUsedNotifyEnableForPostSync(); - EncodeMiFlushDW::programMiFlushDw(commandStream, blitProperties.multiRootDeviceEventSync->getGpuAddress() + blitProperties.multiRootDeviceEventSync->getContextEndOffset(), std::numeric_limits::max(), args, hwInfo); - } } BlitCommandsHelper::programGlobalSequencerFlush(commandStream); @@ -1251,6 +1245,7 @@ TaskCountType CommandStreamReceiverHw::flushBcsTask(const BlitPropert MemorySynchronizationCommands::addAdditionalSynchronization(commandStream, tagAllocation->getGpuAddress(), false, peekHwInfo()); } + if (PauseOnGpuProperties::pauseModeAllowed(DebugManager.flags.PauseOnBlitCopy.get(), taskCount, PauseOnGpuProperties::PauseMode::AfterWorkload)) { BlitCommandsHelper::dispatchDebugPauseCommands(commandStream, getDebugPauseStateGPUAddress(), DebugPauseState::waitingForUserEndConfirmation, @@ -1527,11 +1522,6 @@ TagAllocatorBase *CommandStreamReceiverHw::getTimestampPacketAllocato return timestampPacketAllocator.get(); } -template -std::unique_ptr CommandStreamReceiverHw::createMultiRootDeviceTimestampPacketAllocator(const RootDeviceIndicesContainer rootDeviceIndices) { - auto &gfxCoreHelper = getGfxCoreHelper(); - return gfxCoreHelper.createTimestampPacketAllocator(rootDeviceIndices, getMemoryManager(), getPreferredTagPoolSize(), getType(), osContext->getDeviceBitfield()); -} template void CommandStreamReceiverHw::postInitFlagsSetup() { useNewResourceImplicitFlush = checkPlatformSupportsNewResourceImplicitFlush(); diff --git a/shared/source/command_stream/csr_deps.h b/shared/source/command_stream/csr_deps.h index 1e1b8d0130..eab7f8d115 100644 --- a/shared/source/command_stream/csr_deps.h +++ b/shared/source/command_stream/csr_deps.h @@ -22,7 +22,7 @@ class CsrDependencies { All }; - StackVec multiRootTimeStampSyncContainer; + StackVec, 32> taskCountContainer; StackVec timestampPacketContainer; void makeResident(CommandStreamReceiver &commandStreamReceiver) const; diff --git a/shared/source/helpers/blit_commands_helper.cpp b/shared/source/helpers/blit_commands_helper.cpp index 67fa909e65..3312532c07 100644 --- a/shared/source/helpers/blit_commands_helper.cpp +++ b/shared/source/helpers/blit_commands_helper.cpp @@ -50,7 +50,6 @@ BlitProperties BlitProperties::constructPropertiesForReadWrite(BlitterConstants: BlitterConstants::BlitDirection::HostPtrToImage == blitDirection) { return { nullptr, // outputTimestampPacket - nullptr, // multiRootDeviceEventSync blitDirection, // blitDirection {}, // csrDependencies AuxTranslationDirection::None, // auxTranslationDirection @@ -74,7 +73,6 @@ BlitProperties BlitProperties::constructPropertiesForReadWrite(BlitterConstants: } else { return { nullptr, // outputTimestampPacket - nullptr, // multiRootDeviceEventSync blitDirection, // blitDirection {}, // csrDependencies AuxTranslationDirection::None, // auxTranslationDirection @@ -106,7 +104,6 @@ BlitProperties BlitProperties::constructPropertiesForCopy(GraphicsAllocation *ds return { nullptr, // outputTimestampPacket - nullptr, // multiRootDeviceEventSync BlitterConstants::BlitDirection::BufferToBuffer, // blitDirection {}, // csrDependencies AuxTranslationDirection::None, // auxTranslationDirection @@ -131,7 +128,6 @@ BlitProperties BlitProperties::constructPropertiesForAuxTranslation(AuxTranslati auto allocationSize = allocation->getUnderlyingBufferSize(); return { nullptr, // outputTimestampPacket - nullptr, // multiRootDeviceEventSync BlitterConstants::BlitDirection::BufferToBuffer, // blitDirection {}, // csrDependencies auxTranslationDirection, // auxTranslationDirection diff --git a/shared/source/helpers/blit_commands_helper.h b/shared/source/helpers/blit_commands_helper.h index 8302f2bc08..4827e0cef1 100644 --- a/shared/source/helpers/blit_commands_helper.h +++ b/shared/source/helpers/blit_commands_helper.h @@ -61,7 +61,6 @@ struct BlitProperties { CommandStreamReceiver &gpguCsr, CommandStreamReceiver &bcsCsr); TagNodeBase *outputTimestampPacket = nullptr; - TagNodeBase *multiRootDeviceEventSync = nullptr; BlitterConstants::BlitDirection blitDirection = BlitterConstants::BlitDirection::BufferToHostPtr; CsrDependencies csrDependencies; AuxTranslationDirection auxTranslationDirection = AuxTranslationDirection::None; diff --git a/shared/source/helpers/blit_commands_helper_base.inl b/shared/source/helpers/blit_commands_helper_base.inl index d310dcc425..bef912ec91 100644 --- a/shared/source/helpers/blit_commands_helper_base.inl +++ b/shared/source/helpers/blit_commands_helper_base.inl @@ -127,7 +127,7 @@ size_t BlitCommandsHelper::estimateBlitCommandSize(const Vec3 sizePerBlit += estimatePostBlitCommandSize(); return TimestampPacketHelper::getRequiredCmdStreamSize(csrDependencies) + - TimestampPacketHelper::getRequiredCmdStreamSizeForMultiRootDeviceSyncNodesContainer(csrDependencies) + + TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer(csrDependencies) + (sizePerBlit * nBlits) + timestampCmdSize + estimatePreBlitCommandSize(); @@ -143,9 +143,6 @@ size_t BlitCommandsHelper::estimateBlitCommandsSize(const BlitPropert auto isImage = blitProperties.isImageOperation(); size += BlitCommandsHelper::estimateBlitCommandSize(blitProperties.copySize, blitProperties.csrDependencies, updateTimestampPacket, profilingEnabled, isImage, rootDeviceEnvironment, blitProperties.isSystemMemoryPoolUsed); - if (blitProperties.multiRootDeviceEventSync != nullptr) { - size += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); - } } size += BlitCommandsHelper::getWaCmdsSize(blitPropertiesContainer); size += 2 * MemorySynchronizationCommands::getSizeForAdditonalSynchronization(*rootDeviceEnvironment.getHardwareInfo()); diff --git a/shared/source/helpers/timestamp_packet.h b/shared/source/helpers/timestamp_packet.h index da7e68d65e..a3b67f50c0 100644 --- a/shared/source/helpers/timestamp_packet.h +++ b/shared/source/helpers/timestamp_packet.h @@ -144,11 +144,17 @@ struct TimestampPacketHelper { } template - static void programCsrDependenciesForForMultiRootDeviceSyncContainer(LinearStream &cmdStream, const CsrDependencies &csrDependencies) { - for (auto timestampPacketContainer : csrDependencies.multiRootTimeStampSyncContainer) { - for (auto &node : timestampPacketContainer->peekNodes()) { - TimestampPacketHelper::programSemaphore(cmdStream, *node); - } + static void programCsrDependenciesForForTaskCountContainer(LinearStream &cmdStream, const CsrDependencies &csrDependencies) { + auto &taskCountContainer = csrDependencies.taskCountContainer; + + for (auto &[taskCountPreviousRootDevice, tagAddressPreviousRootDevice] : taskCountContainer) { + using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION; + using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT; + + EncodeSempahore::addMiSemaphoreWaitCommand(cmdStream, + static_cast(tagAddressPreviousRootDevice), + static_cast(taskCountPreviousRootDevice), + COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); } } @@ -211,8 +217,8 @@ struct TimestampPacketHelper { } template - static size_t getRequiredCmdStreamSizeForMultiRootDeviceSyncNodesContainer(const CsrDependencies &csrDependencies) { - return csrDependencies.multiRootTimeStampSyncContainer.size() * sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT); + static size_t getRequiredCmdStreamSizeForTaskCountContainer(const CsrDependencies &csrDependencies) { + return csrDependencies.taskCountContainer.size() * sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT); } }; diff --git a/shared/test/common/mocks/mock_command_stream_receiver.h b/shared/test/common/mocks/mock_command_stream_receiver.h index 40564db786..b413a2140f 100644 --- a/shared/test/common/mocks/mock_command_stream_receiver.h +++ b/shared/test/common/mocks/mock_command_stream_receiver.h @@ -18,7 +18,6 @@ #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/surface.h" #include "shared/source/os_interface/os_context.h" -#include "shared/source/utilities/tag_allocator.h" #include "shared/test/common/helpers/dispatch_flags_helper.h" #include @@ -95,7 +94,6 @@ class MockCommandStreamReceiver : public CommandStreamReceiver { }; TagAllocatorBase *getTimestampPacketAllocator() override { return nullptr; } - std::unique_ptr createMultiRootDeviceTimestampPacketAllocator(const RootDeviceIndicesContainer rootDeviceIndices) override { return std::unique_ptr(nullptr); } CompletionStamp flushTask( LinearStream &commandStream, diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index 4c16dca5db..795dd5d450 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -15,13 +15,10 @@ #include "shared/source/helpers/api_specific_config.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/surface.h" -#include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/utilities/tag_allocator.h" -#include "shared/test/common/cmd_parse/gen_cmd_parse.h" -#include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/fixtures/command_stream_receiver_fixture.inl" #include "shared/test/common/fixtures/device_fixture.h" #include "shared/test/common/helpers/batch_buffer_helper.h" @@ -36,7 +33,6 @@ #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_internal_allocation_storage.h" #include "shared/test/common/mocks/mock_memory_manager.h" -#include "shared/test/common/mocks/mock_timestamp_container.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/hw_test.h" #include "shared/test/common/test_macros/test_checks_shared.h" @@ -2481,89 +2477,3 @@ HWTEST_F(CommandStreamReceiverHwTest, givenVariousCsrModeWhenGettingTbxModeThenE ultCsr.commandStreamReceiverType = CommandStreamReceiverType::CSR_TBX_WITH_AUB; EXPECT_TRUE(ultCsr.isTbxMode()); } - -HWTEST_F(CommandStreamReceiverHwTest, GivenTwoRootDevicesWhengetMultiRootDeviceTimestampPacketAllocatorCalledThenAllocatorForTwoDevicesCreated) { - auto executionEnvironment = std::make_unique(defaultHwInfo.get(), true, 2u); - auto devices = DeviceFactory::createDevices(*executionEnvironment.release()); - const RootDeviceIndicesContainer indices = {0u, 1u}; - auto csr = devices[0]->getDefaultEngine().commandStreamReceiver; - auto allocator = csr->createMultiRootDeviceTimestampPacketAllocator(indices); - class MockTagAllocatorBase : public TagAllocatorBase { - public: - using TagAllocatorBase::maxRootDeviceIndex; - }; - EXPECT_EQ(reinterpret_cast(allocator.get())->maxRootDeviceIndex, 1u); -} -HWTEST_F(CommandStreamReceiverHwTest, GivenFiveRootDevicesWhengetMultiRootDeviceTimestampPacketAllocatorCalledThenAllocatorForFiveDevicesCreated) { - auto executionEnvironment = std::make_unique(defaultHwInfo.get(), true, 4u); - auto devices = DeviceFactory::createDevices(*executionEnvironment.release()); - const RootDeviceIndicesContainer indices = {0u, 1u, 2u, 3u}; - auto csr = devices[0]->getDefaultEngine().commandStreamReceiver; - auto allocator = csr->createMultiRootDeviceTimestampPacketAllocator(indices); - class MockTagAllocatorBase : public TagAllocatorBase { - public: - using TagAllocatorBase::maxRootDeviceIndex; - }; - EXPECT_EQ(reinterpret_cast(allocator.get())->maxRootDeviceIndex, 3u); -} -HWTEST_F(CommandStreamReceiverHwTest, givenMultiRootDeviceSyncNodeWhenFlushBcsTAskThenMiFlushAdded) { - using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; - auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); - auto mockTagAllocator = std::make_unique>(pDevice->getRootDeviceIndex(), pDevice->getExecutionEnvironment()->memoryManager.get(), 10u); - - auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::BufferToHostPtr, - commandStreamReceiver, commandStreamReceiver.getTagAllocation(), nullptr, - commandStreamReceiver.getTagAllocation()->getUnderlyingBuffer(), - commandStreamReceiver.getTagAllocation()->getGpuAddress(), 0, - 0, 0, 0, 0, 0, 0, 0); - auto tag = mockTagAllocator->getTag(); - blitProperties.multiRootDeviceEventSync = tag; - - BlitPropertiesContainer container; - container.push_back(blitProperties); - commandStreamReceiver.flushBcsTask(container, true, false, *pDevice); - HardwareParse hwParser; - hwParser.parseCommands(commandStreamReceiver.commandStream, 0); - - auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - bool nodeAddressFound = false; - while (cmdIterator != hwParser.cmdList.end()) { - auto flush = genCmdCast(*cmdIterator); - if (flush->getDestinationAddress() == tag->getGpuAddress() + tag->getContextEndOffset()) { - nodeAddressFound = true; - break; - } - cmdIterator = find(++cmdIterator, hwParser.cmdList.end()); - } - EXPECT_TRUE(nodeAddressFound); -} -HWTEST_F(CommandStreamReceiverHwTest, givenNullPtrAsMultiRootDeviceSyncNodeWhenFlushBcsTAskThenMiFlushNotAdded) { - using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; - auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); - auto mockTagAllocator = std::make_unique>(pDevice->getRootDeviceIndex(), pDevice->getExecutionEnvironment()->memoryManager.get(), 10u); - - auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::BufferToHostPtr, - commandStreamReceiver, commandStreamReceiver.getTagAllocation(), nullptr, - commandStreamReceiver.getTagAllocation()->getUnderlyingBuffer(), - commandStreamReceiver.getTagAllocation()->getGpuAddress(), 0, - 0, 0, 0, 0, 0, 0, 0); - auto tag = mockTagAllocator->getTag(); - - BlitPropertiesContainer container; - container.push_back(blitProperties); - commandStreamReceiver.flushBcsTask(container, true, false, *pDevice); - HardwareParse hwParser; - hwParser.parseCommands(commandStreamReceiver.commandStream, 0); - - auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - bool nodeAddressFound = false; - while (cmdIterator != hwParser.cmdList.end()) { - auto flush = genCmdCast(*cmdIterator); - if (flush->getDestinationAddress() == tag->getGpuAddress() + tag->getContextEndOffset()) { - nodeAddressFound = true; - break; - } - cmdIterator = find(++cmdIterator, hwParser.cmdList.end()); - } - EXPECT_FALSE(nodeAddressFound); -} diff --git a/shared/test/unit_test/helpers/blit_commands_helper_tests.cpp b/shared/test/unit_test/helpers/blit_commands_helper_tests.cpp index 9f862a1a5f..2675b9b744 100644 --- a/shared/test/unit_test/helpers/blit_commands_helper_tests.cpp +++ b/shared/test/unit_test/helpers/blit_commands_helper_tests.cpp @@ -16,7 +16,6 @@ #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" -#include "shared/test/common/mocks/mock_timestamp_container.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/test_checks_shared.h" @@ -665,25 +664,3 @@ HWTEST2_F(BlitTests, givenPlatformWhenCallingDispatchPreBlitCommandThenNoneMiFlu auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_EQ(hwParser.cmdList.end(), cmdIterator); } - -HWTEST_F(BlitTests, givenPlatformWhenCallingDispatchPreBlitCommandThenNoneMiFlushDwIsProgramed) { - auto mockTagAllocator = std::make_unique>(pDevice->getRootDeviceIndex(), pDevice->getExecutionEnvironment()->memoryManager.get(), 10u); - auto tag = mockTagAllocator->getTag(); - BlitProperties blitProperties{}; - blitProperties.copySize = {1, 1, 1}; - BlitPropertiesContainer blitPropertiesContainer1; - blitPropertiesContainer1.push_back(blitProperties); - blitPropertiesContainer1.push_back(blitProperties); - blitPropertiesContainer1.push_back(blitProperties); - - auto estimatedSizeWithoutNode = BlitCommandsHelper::estimateBlitCommandsSize( - blitPropertiesContainer1, false, true, false, pDevice->getRootDeviceEnvironment()); - blitProperties.multiRootDeviceEventSync = tag; - BlitPropertiesContainer blitPropertiesContainer2; - blitPropertiesContainer2.push_back(blitProperties); - blitPropertiesContainer2.push_back(blitProperties); - blitPropertiesContainer2.push_back(blitProperties); - auto estimatedSizeWithNode = BlitCommandsHelper::estimateBlitCommandsSize( - blitPropertiesContainer2, false, true, false, pDevice->getRootDeviceEnvironment()); - EXPECT_NE(estimatedSizeWithoutNode, estimatedSizeWithNode); -} \ No newline at end of file diff --git a/shared/test/unit_test/helpers/timestamp_packet_tests.cpp b/shared/test/unit_test/helpers/timestamp_packet_tests.cpp index f3f4e14904..8ea1f939fa 100644 --- a/shared/test/unit_test/helpers/timestamp_packet_tests.cpp +++ b/shared/test/unit_test/helpers/timestamp_packet_tests.cpp @@ -303,35 +303,3 @@ HWTEST_F(DeviceTimestampPacketTests, givenDebugFlagSetWhenCreatingTimestampPacke EXPECT_FALSE(tag->canBeReleased()); } - -using TimestampPacketHelperTests = Test; - -HWTEST_F(TimestampPacketHelperTests, givenTagNodesInMultiRootSyncContainerWhenProgramingDependensiecThenSemaforesAreProgrammed) { - StackVec buffer(4096); - LinearStream cmdStream(buffer.begin(), buffer.size()); - CsrDependencies deps; - auto mockTagAllocator = std::make_unique>(0, pDevice->getMemoryManager()); - TimestampPacketContainer container = {}; - container.add(mockTagAllocator->getTag()); - deps.multiRootTimeStampSyncContainer.push_back(&container); - TimestampPacketHelper::programCsrDependenciesForForMultiRootDeviceSyncContainer(cmdStream, deps); - EXPECT_EQ(cmdStream.getUsed(), sizeof(typename FamilyType::MI_SEMAPHORE_WAIT)); -} - -HWTEST_F(TimestampPacketHelperTests, givenEmptyContainerMultiRootSyncContainerWhenProgramingDependensiecThenZeroSemaforesAreProgrammed) { - StackVec buffer(4096); - LinearStream cmdStream(buffer.begin(), buffer.size()); - CsrDependencies deps; - TimestampPacketContainer container = {}; - deps.multiRootTimeStampSyncContainer.push_back(&container); - TimestampPacketHelper::programCsrDependenciesForForMultiRootDeviceSyncContainer(cmdStream, deps); - EXPECT_EQ(cmdStream.getUsed(), 0u); -} - -HWTEST_F(TimestampPacketHelperTests, givenEmptyMultiRootSyncContainerWhenProgramingDependensiecThenZeroSemaforesAreProgrammed) { - StackVec buffer(4096); - LinearStream cmdStream(buffer.begin(), buffer.size()); - CsrDependencies deps; - TimestampPacketHelper::programCsrDependenciesForForMultiRootDeviceSyncContainer(cmdStream, deps); - EXPECT_EQ(cmdStream.getUsed(), 0u); -} \ No newline at end of file