From 876de37b92a003b56781477f20dacc9446f268f9 Mon Sep 17 00:00:00 2001 From: Compute-Runtime-Validation Date: Sat, 24 Dec 2022 18:25:41 +0100 Subject: [PATCH] Revert "Feature(OCL) Use tag nodes for root device synchronization" This reverts commit 547d1c37b3683cbc6cb63e6b1bde10c07f3ea4a4. Signed-off-by: Compute-Runtime-Validation --- opencl/source/command_queue/CMakeLists.txt | 1 - .../source/command_queue/command_queue_hw.h | 7 +- .../command_queue_hw_xehp_and_later.inl | 2 +- ..._process_dispatch_for_kernels_instance.inl | 16 -- opencl/source/command_queue/enqueue_common.h | 69 +---- opencl/source/command_queue/gpgpu_walker.h | 6 +- .../command_queue/gpgpu_walker_base.inl | 12 +- .../source/command_queue/hardware_interface.h | 1 - .../command_queue/hardware_interface_base.inl | 14 - opencl/source/context/context.cpp | 14 +- opencl/source/context/context.h | 6 - opencl/source/event/event.cpp | 20 +- opencl/source/event/event.h | 4 - opencl/source/gen11/command_queue_gen11.cpp | 2 +- .../source/gen12lp/command_queue_gen12lp.cpp | 5 +- opencl/source/gen8/command_queue_gen8.cpp | 2 +- opencl/source/gen9/command_queue_gen9.cpp | 4 +- opencl/source/helpers/properties_helper.cpp | 15 +- opencl/source/helpers/properties_helper.h | 2 +- opencl/source/helpers/task_information.cpp | 14 +- opencl/source/helpers/task_information.h | 3 +- .../xe_hp_core/command_queue_xe_hp_core.cpp | 1 + .../xe_hpc_core/command_queue_xe_hpc_core.cpp | 1 - .../xe_hpg_core/command_queue_xe_hpg_core.cpp | 1 - .../command_queue_hw_1_tests.cpp | 239 ------------------ .../command_queue_hw_2_tests.cpp | 92 +------ .../command_queue/dispatch_walker_tests.cpp | 1 - .../dispatch_walker_tests_xehp_and_later.cpp | 8 +- .../enqueue_command_without_kernel_tests.cpp | 10 +- ...and_without_kernel_tests_dg2_and_later.cpp | 2 +- .../command_queue/enqueue_kernel_1_tests.cpp | 4 +- .../command_queue/enqueue_kernel_2_tests.cpp | 12 +- .../get_size_required_buffer_tests.cpp | 75 +----- ...and_stream_receiver_flush_task_3_tests.cpp | 4 +- ...and_stream_receiver_flush_task_4_tests.cpp | 171 +++---------- .../command_stream_receiver_hw_1_tests.cpp | 73 +++--- .../command_stream_receiver_hw_2_tests.cpp | 2 +- .../unit_test/event/event_builder_tests.cpp | 4 +- opencl/test/unit_test/event/event_tests.cpp | 14 +- opencl/test/unit_test/gtpin/gtpin_tests.cpp | 2 +- .../helpers/task_information_tests.cpp | 8 +- .../helpers/timestamp_packet_1_tests.cpp | 12 +- .../kernel_cache_flush_requirements_tests.cpp | 4 +- opencl/test/unit_test/kernel/kernel_tests.cpp | 1 - .../unit_test/mem_obj/buffer_bcs_tests.cpp | 6 +- .../test/unit_test/mocks/mock_command_queue.h | 1 - opencl/test/unit_test/mocks/mock_event.h | 1 - .../unit_test/profiling/profiling_tests.cpp | 16 +- .../command_stream_receiver_hw_tests_pvc.cpp | 20 +- .../command_stream/command_stream_receiver.h | 1 - .../command_stream_receiver_hw.h | 1 - .../command_stream_receiver_hw_base.inl | 18 +- shared/source/command_stream/csr_deps.h | 2 +- .../source/helpers/blit_commands_helper.cpp | 4 - shared/source/helpers/blit_commands_helper.h | 1 - .../helpers/blit_commands_helper_base.inl | 5 +- shared/source/helpers/timestamp_packet.h | 20 +- .../mocks/mock_command_stream_receiver.h | 2 - .../command_stream_receiver_tests.cpp | 90 ------- .../helpers/blit_commands_helper_tests.cpp | 23 -- .../helpers/timestamp_packet_tests.cpp | 32 --- 61 files changed, 209 insertions(+), 994 deletions(-) delete mode 100644 opencl/source/command_queue/command_queue_process_dispatch_for_kernels_instance.inl diff --git a/opencl/source/command_queue/CMakeLists.txt b/opencl/source/command_queue/CMakeLists.txt index a597140777..5e782f5c18 100644 --- a/opencl/source/command_queue/CMakeLists.txt +++ b/opencl/source/command_queue/CMakeLists.txt @@ -13,7 +13,6 @@ set(RUNTIME_SRCS_COMMAND_QUEUE ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw.h ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_bdw_and_later.inl - ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_process_dispatch_for_kernels_instance.inl ${CMAKE_CURRENT_SOURCE_DIR}/copy_engine_state.h ${CMAKE_CURRENT_SOURCE_DIR}/cpu_data_transfer_handler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/csr_selection_args.h diff --git a/opencl/source/command_queue/command_queue_hw.h b/opencl/source/command_queue/command_queue_hw.h index 4e532c1a02..00a3205603 100644 --- a/opencl/source/command_queue/command_queue_hw.h +++ b/opencl/source/command_queue/command_queue_hw.h @@ -388,8 +388,7 @@ class CommandQueueHw : public CommandQueue { EventsRequest &eventsRequest, EventBuilder &externalEventBuilder, std::unique_ptr &&printfHandler, - CommandStreamReceiver *bcsCsr, - TagNodeBase *multiRootDeviceSyncNode); + CommandStreamReceiver *bcsCsr); CompletionStamp enqueueCommandWithoutKernel(Surface **surfaces, size_t surfaceCount, @@ -420,7 +419,7 @@ class CommandQueueHw : public CommandQueue { TimestampPacketDependencies ×tampPacketDependencies, const EventsRequest &eventsRequest, LinearStream *commandStream, - uint32_t commandType, bool queueBlocked, TagNodeBase *multiRootDeviceEventSync); + uint32_t commandType, bool queueBlocked); void submitCacheFlush(Surface **surfaces, size_t numSurfaces, LinearStream *commandStream, @@ -471,7 +470,7 @@ class CommandQueueHw : public CommandQueue { blockedCommandsData = std::make_unique(commandStream, *gpgpuCsr.getInternalAllocationStorage()); } else { commandStream = &getCommandStream(*this, csrDependencies, profilingRequired, perfCountersRequired, - blitEnqueue, multiDispatchInfo, surfaces, numSurfaces, isMarkerWithProfiling, eventsRequest.numEventsInWaitList > 0, eventsRequest.outEvent); + blitEnqueue, multiDispatchInfo, surfaces, numSurfaces, isMarkerWithProfiling, eventsRequest.numEventsInWaitList > 0); } return commandStream; } diff --git a/opencl/source/command_queue/command_queue_hw_xehp_and_later.inl b/opencl/source/command_queue/command_queue_hw_xehp_and_later.inl index c9dd2c5124..33b2cadafa 100644 --- a/opencl/source/command_queue/command_queue_hw_xehp_and_later.inl +++ b/opencl/source/command_queue/command_queue_hw_xehp_and_later.inl @@ -45,7 +45,7 @@ bool CommandQueueHw::isCacheFlushCommand(uint32_t commandType) const { } template <> -LinearStream &getCommandStream(CommandQueue &commandQueue, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounterCmdsSpace, bool blitEnqueue, const MultiDispatchInfo &multiDispatchInfo, Surface **surfaces, size_t numSurfaces, bool isMarkerWithProfiling, bool eventsInWaitList, cl_event *outEvent) { +LinearStream &getCommandStream(CommandQueue &commandQueue, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounterCmdsSpace, bool blitEnqueue, const MultiDispatchInfo &multiDispatchInfo, Surface **surfaces, size_t numSurfaces, bool isMarkerWithProfiling, bool eventsInWaitList) { size_t expectedSizeCS = 0; [[maybe_unused]] bool usePostSync = false; if (commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { diff --git a/opencl/source/command_queue/command_queue_process_dispatch_for_kernels_instance.inl b/opencl/source/command_queue/command_queue_process_dispatch_for_kernels_instance.inl deleted file mode 100644 index 1d995e463e..0000000000 --- a/opencl/source/command_queue/command_queue_process_dispatch_for_kernels_instance.inl +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Copyright (C) 2022 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#pragma once -template void CommandQueueHw::processDispatchForKernels(const MultiDispatchInfo &multiDispatchInfo, - std::unique_ptr &printfHandler, - Event *event, - TagNodeBase *&hwTimeStamps, - bool blockQueue, - CsrDependencies &csrDeps, - KernelOperation *blockedCommandsData, - TimestampPacketDependencies ×tampPacketDependencies); \ No newline at end of file diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index 3ceb9ba3c0..320aac9f09 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -176,7 +176,7 @@ cl_int CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, BlitPropertiesContainer blitPropertiesContainer; if (this->context->getRootDeviceIndices().size() > 1) { - eventsRequest.fillCsrDependenciesForRootDevices(csrDeps, computeCommandStreamReceiver); + eventsRequest.fillCsrDependenciesForTaskCountContainer(csrDeps, computeCommandStreamReceiver); } const bool enqueueWithBlitAuxTranslation = isBlitAuxTranslationRequired(multiDispatchInfo); @@ -225,7 +225,7 @@ cl_int CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, } if (this->context->getRootDeviceIndices().size() > 1) { - TimestampPacketHelper::programCsrDependenciesForForMultiRootDeviceSyncContainer(commandStream, csrDeps); + TimestampPacketHelper::programCsrDependenciesForForTaskCountContainer(commandStream, csrDeps); } if (enqueueWithBlitAuxTranslation) { @@ -240,13 +240,6 @@ cl_int CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, } bool flushDependenciesForNonKernelCommand = false; - TagNodeBase *multiRootEventSyncStamp = nullptr; - if (eventBuilder.getEvent() && eventBuilder.getEvent()->getContext()->getRootDeviceIndices().size() > 1) { - multiRootEventSyncStamp = eventBuilder.getEvent()->getMultiRootTimestampSyncNode(); - if (!blockQueue) { - this->getGpgpuCommandStreamReceiver().makeResident(*multiRootEventSyncStamp->getBaseGraphicsAllocation()); - } - } if (multiDispatchInfo.empty() == false) { processDispatchForKernels(multiDispatchInfo, printfHandler, eventBuilder.getEvent(), @@ -388,8 +381,7 @@ cl_int CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, eventsRequest, eventBuilder, std::move(printfHandler), - nullptr, - multiRootEventSyncStamp); + nullptr); } if (deferredTimestampPackets.get()) { @@ -482,10 +474,6 @@ void CommandQueueHw::processDispatchForKernels(const MultiDispatchInf dispatchWalkerArgs.commandType = commandType; dispatchWalkerArgs.event = event; - if (event && event->getMultiRootDeviceTimestampPacketNodes() && !event->getMultiRootDeviceTimestampPacketNodes()->peekNodes().empty()) { - dispatchWalkerArgs.multiRootDeviceEventStamp = event->getMultiRootDeviceTimestampPacketNodes()->peekNodes().at(0u); - } - HardwareInterface::dispatchWalker( *this, multiDispatchInfo, @@ -508,7 +496,7 @@ BlitProperties CommandQueueHw::processDispatchForBlitEnqueue(CommandS const MultiDispatchInfo &multiDispatchInfo, TimestampPacketDependencies ×tampPacketDependencies, const EventsRequest &eventsRequest, LinearStream *commandStream, - uint32_t commandType, bool queueBlocked, TagNodeBase *multiRootDeviceEventSync) { + uint32_t commandType, bool queueBlocked) { auto blitDirection = ClBlitProperties::obtainBlitDirection(commandType); auto blitProperties = ClBlitProperties::constructProperties(blitDirection, blitCommandStreamReceiver, @@ -521,7 +509,7 @@ BlitProperties CommandQueueHw::processDispatchForBlitEnqueue(CommandS blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies.previousEnqueueNodes); blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies.barrierNodes); } - blitProperties.multiRootDeviceEventSync = multiRootDeviceEventSync; + auto currentTimestampPacketNode = timestampPacketContainer->peekNodes().at(0); blitProperties.outputTimestampPacket = currentTimestampPacketNode; @@ -626,19 +614,6 @@ void CommandQueueHw::processDispatchForMarker(CommandQueue &commandQu HardwareInterface::dispatchProfilingPerfStartCommands(hwTimeStamps, hwPerfCounter, commandStream, commandQueue); HardwareInterface::dispatchProfilingPerfEndCommands(hwTimeStamps, hwPerfCounter, commandStream, commandQueue); getGpgpuCommandStreamReceiver().makeResident(*hwTimeStamps->getBaseGraphicsAllocation()); - if (event->getMultiRootDeviceTimestampPacketNodes() && !event->getMultiRootDeviceTimestampPacketNodes()->peekNodes().empty()) { - auto node = *(event->getMultiRootDeviceTimestampPacketNodes()->peekNodes().end() - 1); - const auto &hwInfo = commandQueue.getDevice().getHardwareInfo(); - NEO::PipeControlArgs args = {}; - args.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); - MemorySynchronizationCommands::addBarrierWithPostSyncOperation( - *commandStream, - PostSyncMode::ImmediateData, - node->getGpuAddress() + node->getContextEndOffset(), - std::numeric_limits::max(), - hwInfo, - args); - } } template @@ -659,22 +634,6 @@ void CommandQueueHw::processDispatchForMarkerWithTimestampPacket(Comm EncodeStoreMMIO::encode(*commandStream, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timestampContextEndGpuAddress, false); EncodeStoreMMIO::encode(*commandStream, REG_GLOBAL_TIMESTAMP_LDW, timestampGlobalEndAddress, false); - if (eventsRequest.outEvent != nullptr) { - auto event = castToObjectOrAbort(*eventsRequest.outEvent); - if (event->getMultiRootDeviceTimestampPacketNodes() && !event->getMultiRootDeviceTimestampPacketNodes()->peekNodes().empty()) { - auto node = *(event->getMultiRootDeviceTimestampPacketNodes()->peekNodes().end() - 1); - const auto &hwInfo = commandQueue.getDevice().getHardwareInfo(); - NEO::PipeControlArgs args = {}; - args.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); - MemorySynchronizationCommands::addBarrierWithPostSyncOperation( - *commandStream, - PostSyncMode::ImmediateData, - node->getGpuAddress() + node->getContextEndOffset(), - std::numeric_limits::max(), - hwInfo, - args); - } - } } template @@ -941,8 +900,7 @@ void CommandQueueHw::enqueueBlocked( EventsRequest &eventsRequest, EventBuilder &externalEventBuilder, std::unique_ptr &&printfHandler, - CommandStreamReceiver *bcsCsr, - TagNodeBase *multiRootDeviceSyncNode) { + CommandStreamReceiver *bcsCsr) { TakeOwnershipWrapper> queueOwnership(*this); @@ -1013,8 +971,7 @@ void CommandQueueHw::enqueueBlocked( std::move(printfHandler), preemptionMode, multiDispatchInfo.peekMainKernel(), - (uint32_t)multiDispatchInfo.size(), - multiRootDeviceSyncNode); + (uint32_t)multiDispatchInfo.size()); } if (storeTimestampPackets) { command->setTimestampPacketNode(*timestampPacketContainer, std::move(timestampPacketDependencies)); @@ -1316,14 +1273,10 @@ cl_int CommandQueueHw::enqueueBlit(const MultiDispatchInfo &multiDisp } TimestampPacketDependencies timestampPacketDependencies; - TagNodeBase *multiRootEventSyncStamp = nullptr; BlitPropertiesContainer blitPropertiesContainer; CsrDependencies csrDeps; eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, bcsCsr, CsrDependencies::DependenciesType::All); - if (this->context->getRootDeviceIndices().size() > 1) { - eventsRequest.fillCsrDependenciesForRootDevices(csrDeps, bcsCsr); - } auto allocator = bcsCsr.getTimestampPacketAllocator(); if (!blockQueue) { @@ -1346,10 +1299,6 @@ cl_int CommandQueueHw::enqueueBlit(const MultiDispatchInfo &multiDisp if (eventBuilder.getEvent()) { eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer); } - if (eventBuilder.getEvent() && eventBuilder.getEvent()->getContext()->getRootDeviceIndices().size() > 1) { - multiRootEventSyncStamp = eventBuilder.getEvent()->getMultiRootTimestampSyncNode(); - this->getGpgpuCommandStreamReceiver().makeResident(*multiRootEventSyncStamp->getBaseGraphicsAllocation()); - } CompletionStamp completionStamp = {CompletionStamp::notReady, taskLevel, 0}; @@ -1366,7 +1315,7 @@ cl_int CommandQueueHw::enqueueBlit(const MultiDispatchInfo &multiDisp } blitPropertiesContainer.push_back(processDispatchForBlitEnqueue(bcsCsr, multiDispatchInfo, timestampPacketDependencies, - eventsRequest, gpgpuCommandStream, cmdType, blockQueue, multiRootEventSyncStamp)); + eventsRequest, gpgpuCommandStream, cmdType, blockQueue)); if (!blockQueue) { completionStamp = enqueueCommandWithoutKernel(nullptr, 0, gpgpuCommandStream, gpgpuCommandStreamStart, blocking, @@ -1393,7 +1342,7 @@ cl_int CommandQueueHw::enqueueBlit(const MultiDispatchInfo &multiDisp updateFromCompletionStamp(completionStamp, eventBuilder.getEvent()); if (blockQueue) { - enqueueBlocked(cmdType, nullptr, 0, multiDispatchInfo, timestampPacketDependencies, blockedCommandsData, enqueueProperties, eventsRequest, eventBuilder, nullptr, &bcsCsr, multiRootEventSyncStamp); + enqueueBlocked(cmdType, nullptr, 0, multiDispatchInfo, timestampPacketDependencies, blockedCommandsData, enqueueProperties, eventsRequest, eventBuilder, nullptr, &bcsCsr); if (gpgpuSubmission) { if (DebugManager.flags.ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission.get() == 1) { diff --git a/opencl/source/command_queue/gpgpu_walker.h b/opencl/source/command_queue/gpgpu_walker.h index 8e7dc560a6..f577631992 100644 --- a/opencl/source/command_queue/gpgpu_walker.h +++ b/opencl/source/command_queue/gpgpu_walker.h @@ -89,7 +89,7 @@ class GpgpuWalkerHelper { template struct EnqueueOperation { using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; - static size_t getTotalSizeRequiredCS(uint32_t eventType, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounters, bool blitEnqueue, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, bool isMarkerWithProfiling, bool eventsInWaitList, cl_event *outEvent); + static size_t getTotalSizeRequiredCS(uint32_t eventType, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounters, bool blitEnqueue, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, bool isMarkerWithProfiling, bool eventsInWaitList); static size_t getSizeRequiredCS(uint32_t cmdType, bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel, const DispatchInfo &dispatchInfo); static size_t getSizeRequiredForTimestampPacketWrite(); static size_t getSizeForCacheFlushAfterWalkerCommands(const Kernel &kernel, const CommandQueue &commandQueue); @@ -102,8 +102,8 @@ struct EnqueueOperation { template LinearStream &getCommandStream(CommandQueue &commandQueue, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounterCmdsSpace, bool blitEnqueue, const MultiDispatchInfo &multiDispatchInfo, - Surface **surfaces, size_t numSurfaces, bool isMarkerWithProfiling, bool eventsInWaitList, cl_event *outEvent) { - size_t expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(eventType, csrDeps, reserveProfilingCmdsSpace, reservePerfCounterCmdsSpace, blitEnqueue, commandQueue, multiDispatchInfo, isMarkerWithProfiling, eventsInWaitList, outEvent); + Surface **surfaces, size_t numSurfaces, bool isMarkerWithProfiling, bool eventsInWaitList) { + size_t expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(eventType, csrDeps, reserveProfilingCmdsSpace, reservePerfCounterCmdsSpace, blitEnqueue, commandQueue, multiDispatchInfo, isMarkerWithProfiling, eventsInWaitList); return commandQueue.getCS(expectedSizeCS); } diff --git a/opencl/source/command_queue/gpgpu_walker_base.inl b/opencl/source/command_queue/gpgpu_walker_base.inl index 98af358a86..b1dda219dd 100644 --- a/opencl/source/command_queue/gpgpu_walker_base.inl +++ b/opencl/source/command_queue/gpgpu_walker_base.inl @@ -165,7 +165,7 @@ size_t GpgpuWalkerHelper::getSizeForWaDisableRccRhwoOptimization(cons } template -size_t EnqueueOperation::getTotalSizeRequiredCS(uint32_t eventType, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounters, bool blitEnqueue, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, bool isMarkerWithProfiling, bool eventsInWaitlist, cl_event *outEvent) { +size_t EnqueueOperation::getTotalSizeRequiredCS(uint32_t eventType, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounters, bool blitEnqueue, CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo, bool isMarkerWithProfiling, bool eventsInWaitlist) { size_t expectedSizeCS = 0; auto &hwInfo = commandQueue.getDevice().getHardwareInfo(); auto &gfxCoreHelper = commandQueue.getDevice().getGfxCoreHelper(); @@ -216,14 +216,8 @@ size_t EnqueueOperation::getTotalSizeRequiredCS(uint32_t eventType, c if (DebugManager.flags.GpuScratchRegWriteAfterWalker.get() != -1) { expectedSizeCS += sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM); } - expectedSizeCS += TimestampPacketHelper::getRequiredCmdStreamSizeForMultiRootDeviceSyncNodesContainer(csrDeps); - if (outEvent) { - auto pEvent = castToObjectOrAbort(*outEvent); - if ((pEvent->getContext()->getRootDeviceIndices().size() > 1) && (!pEvent->isUserEvent())) { - expectedSizeCS += MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(hwInfo, false); - } - } - expectedSizeCS += MemorySynchronizationCommands::getSizeForSingleBarrier(false); + + expectedSizeCS += TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer(csrDeps); return expectedSizeCS; } diff --git a/opencl/source/command_queue/hardware_interface.h b/opencl/source/command_queue/hardware_interface.h index 81b2d90dff..a88e1c6d4f 100644 --- a/opencl/source/command_queue/hardware_interface.h +++ b/opencl/source/command_queue/hardware_interface.h @@ -31,7 +31,6 @@ struct HardwareInterfaceWalkerArgs { size_t localWorkSizes[3] = {}; TagNodeBase *hwTimeStamps = nullptr; TagNodeBase *hwPerfCounter = nullptr; - TagNodeBase *multiRootDeviceEventStamp = nullptr; TimestampPacketDependencies *timestampPacketDependencies = nullptr; TimestampPacketContainer *currentTimestampPacketNodes = nullptr; const Vec3 *numberOfWorkgroups = nullptr; diff --git a/opencl/source/command_queue/hardware_interface_base.inl b/opencl/source/command_queue/hardware_interface_base.inl index a50bf9a709..a514157406 100644 --- a/opencl/source/command_queue/hardware_interface_base.inl +++ b/opencl/source/command_queue/hardware_interface_base.inl @@ -133,7 +133,6 @@ void HardwareInterface::dispatchWalker( walkerArgs.currentTimestampPacketNodes); walkerArgs.currentDispatchIndex = 0; - for (auto &dispatchInfo : multiDispatchInfo) { dispatchInfo.dispatchInitCommands(*commandStream, walkerArgs.timestampPacketDependencies, commandQueue.getDevice().getHardwareInfo()); walkerArgs.isMainKernel = (dispatchInfo.getKernel() == mainKernel); @@ -144,19 +143,6 @@ void HardwareInterface::dispatchWalker( dispatchInfo.dispatchEpilogueCommands(*commandStream, walkerArgs.timestampPacketDependencies, commandQueue.getDevice().getHardwareInfo()); } - if (walkerArgs.multiRootDeviceEventStamp != nullptr) { - const auto &hwInfo = commandQueue.getDevice().getHardwareInfo(); - PipeControlArgs args; - args.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); - MemorySynchronizationCommands::addBarrierWithPostSyncOperation( - *commandStream, - PostSyncMode::ImmediateData, - walkerArgs.multiRootDeviceEventStamp->getGpuAddress() + walkerArgs.multiRootDeviceEventStamp->getContextEndOffset(), - std::numeric_limits::max(), - hwInfo, - args); - } - if (mainKernel->requiresCacheFlushCommand(commandQueue)) { uint64_t postSyncAddress = 0; if (commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { diff --git a/opencl/source/context/context.cpp b/opencl/source/context/context.cpp index f4a5328994..3ec69a2036 100644 --- a/opencl/source/context/context.cpp +++ b/opencl/source/context/context.cpp @@ -45,9 +45,7 @@ Context::Context( Context::~Context() { gtpinNotifyContextDestroy((cl_context)this); - if (multiRootDeviceTimestampPacketAllocator.get() != nullptr) { - multiRootDeviceTimestampPacketAllocator.reset(); - } + if (smallBufferPoolAllocator.isAggregatedSmallBuffersEnabled(this)) { smallBufferPoolAllocator.releaseSmallBufferPool(); } @@ -557,15 +555,5 @@ void Context::BufferPoolAllocator::releaseSmallBufferPool() { delete this->mainStorage; this->mainStorage = nullptr; } -TagAllocatorBase *Context::getMultiRootDeviceTimestampPacketAllocator() { - return multiRootDeviceTimestampPacketAllocator.get(); -} -void Context::setMultiRootDeviceTimestampPacketAllocator(std::unique_ptr &allocator) { - multiRootDeviceTimestampPacketAllocator = std::move(allocator); -} - -std::unique_lock Context::obtainOwnershipForMultiRootDeviceAllocator() { - return std::unique_lock(multiRootDeviceAllocatorMtx); -} } // namespace NEO diff --git a/opencl/source/context/context.h b/opencl/source/context/context.h index d6dd69d167..54d8cfcdc6 100644 --- a/opencl/source/context/context.h +++ b/opencl/source/context/context.h @@ -34,7 +34,6 @@ class SharingFunctions; class SVMAllocsManager; class Program; class Platform; -class TagAllocatorBase; template <> struct OpenCLObjectMapper<_cl_context> { @@ -221,9 +220,6 @@ class Context : public BaseObject<_cl_context> { BufferPoolAllocator &getBufferPoolAllocator() { return this->smallBufferPoolAllocator; } - TagAllocatorBase *getMultiRootDeviceTimestampPacketAllocator(); - std::unique_lock obtainOwnershipForMultiRootDeviceAllocator(); - void setMultiRootDeviceTimestampPacketAllocator(std::unique_ptr &allocator); protected: struct BuiltInKernel { @@ -264,8 +260,6 @@ class Context : public BaseObject<_cl_context> { uint32_t maxRootDeviceIndex = std::numeric_limits::max(); cl_bool preferD3dSharedResources = 0u; ContextType contextType = ContextType::CONTEXT_TYPE_DEFAULT; - std::unique_ptr multiRootDeviceTimestampPacketAllocator; - std::mutex multiRootDeviceAllocatorMtx; bool interopUserSync = false; bool resolvesRequiredInKernels = false; diff --git a/opencl/source/event/event.cpp b/opencl/source/event/event.cpp index 6a82308e9e..05e4d6fef0 100644 --- a/opencl/source/event/event.cpp +++ b/opencl/source/event/event.cpp @@ -129,9 +129,6 @@ Event::~Event() { if (timeStampNode != nullptr) { timeStampNode->returnTag(); } - if (multiRootTimeStampSyncNode != nullptr) { - multiRootTimeStampSyncNode->returnTag(); - } if (perfCounterNode != nullptr) { cmdQueue->getPerfCounters()->deleteQuery(perfCounterNode->getQueryHandleRef()); perfCounterNode->getQueryHandleRef() = {}; @@ -878,6 +875,7 @@ TagNodeBase *Event::getHwTimeStampNode() { } TagNodeBase *Event::getHwPerfCounterNode() { + if (!perfCounterNode && cmdQueue->getPerfCounters()) { const uint32_t gpuReportSize = HwPerfCounter::getSize(*(cmdQueue->getPerfCounters())); perfCounterNode = cmdQueue->getGpgpuCommandStreamReceiver().getEventPerfCountAllocator(gpuReportSize)->getTag(); @@ -885,27 +883,11 @@ TagNodeBase *Event::getHwPerfCounterNode() { return perfCounterNode; } -TagNodeBase *Event::getMultiRootTimestampSyncNode() { - auto lock = getContext()->obtainOwnershipForMultiRootDeviceAllocator(); - if (getContext()->getMultiRootDeviceTimestampPacketAllocator() == nullptr) { - auto allocator = cmdQueue->getGpgpuCommandStreamReceiver().createMultiRootDeviceTimestampPacketAllocator(getContext()->getRootDeviceIndices()); - getContext()->setMultiRootDeviceTimestampPacketAllocator(allocator); - } - lock.unlock(); - if (multiRootDeviceTimestampPacketContainer.get() == nullptr) { - multiRootDeviceTimestampPacketContainer = std::make_unique(); - } - multiRootTimeStampSyncNode = getContext()->getMultiRootDeviceTimestampPacketAllocator()->getTag(); - multiRootDeviceTimestampPacketContainer->add(multiRootTimeStampSyncNode); - return multiRootTimeStampSyncNode; -} - void Event::addTimestampPacketNodes(const TimestampPacketContainer &inputTimestampPacketContainer) { timestampPacketContainer->assignAndIncrementNodesRefCounts(inputTimestampPacketContainer); } TimestampPacketContainer *Event::getTimestampPacketNodes() const { return timestampPacketContainer.get(); } -TimestampPacketContainer *Event::getMultiRootDeviceTimestampPacketNodes() const { return multiRootDeviceTimestampPacketContainer.get(); } bool Event::checkUserEventDependencies(cl_uint numEventsInWaitList, const cl_event *eventWaitList) { bool userEventsDependencies = false; diff --git a/opencl/source/event/event.h b/opencl/source/event/event.h index 688f2ed516..7468754286 100644 --- a/opencl/source/event/event.h +++ b/opencl/source/event/event.h @@ -114,7 +114,6 @@ class Event : public BaseObject<_cl_event>, public IDNode { void addTimestampPacketNodes(const TimestampPacketContainer &inputTimestampPacketContainer); TimestampPacketContainer *getTimestampPacketNodes() const; - TimestampPacketContainer *getMultiRootDeviceTimestampPacketNodes() const; bool isPerfCountersEnabled() const { return perfCountersEnabled; @@ -129,7 +128,6 @@ class Event : public BaseObject<_cl_event>, public IDNode { } TagNodeBase *getHwPerfCounterNode(); - TagNodeBase *getMultiRootTimestampSyncNode(); std::unique_ptr flushStamp; std::atomic taskLevel; @@ -388,9 +386,7 @@ class Event : public BaseObject<_cl_event>, public IDNode { bool perfCountersEnabled; TagNodeBase *timeStampNode = nullptr; TagNodeBase *perfCounterNode = nullptr; - TagNodeBase *multiRootTimeStampSyncNode = nullptr; std::unique_ptr timestampPacketContainer; - std::unique_ptr multiRootDeviceTimestampPacketContainer; //number of events this event depends on std::atomic parentCount; //event parents diff --git a/opencl/source/gen11/command_queue_gen11.cpp b/opencl/source/gen11/command_queue_gen11.cpp index 2a6e012be0..c1efcf0cea 100644 --- a/opencl/source/gen11/command_queue_gen11.cpp +++ b/opencl/source/gen11/command_queue_gen11.cpp @@ -16,8 +16,8 @@ namespace NEO { typedef Gen11Family Family; -#include "opencl/source/command_queue/command_queue_process_dispatch_for_kernels_instance.inl" static auto gfxCore = IGFX_GEN11_CORE; + template class CommandQueueHw; template <> diff --git a/opencl/source/gen12lp/command_queue_gen12lp.cpp b/opencl/source/gen12lp/command_queue_gen12lp.cpp index 67cfa54a4e..7ad2d1f9e9 100644 --- a/opencl/source/gen12lp/command_queue_gen12lp.cpp +++ b/opencl/source/gen12lp/command_queue_gen12lp.cpp @@ -14,13 +14,16 @@ #include "command_queue_helpers_gen12lp.inl" namespace NEO { + typedef Gen12LpFamily Family; -#include "opencl/source/command_queue/command_queue_process_dispatch_for_kernels_instance.inl" static auto gfxCore = IGFX_GEN12LP_CORE; + template <> void populateFactoryTable>() { extern CommandQueueCreateFunc commandQueueFactory[IGFX_MAX_CORE]; commandQueueFactory[gfxCore] = CommandQueueHw::create; } + template class CommandQueueHw; + } // namespace NEO diff --git a/opencl/source/gen8/command_queue_gen8.cpp b/opencl/source/gen8/command_queue_gen8.cpp index 29a5613ae8..dc3fcae08e 100644 --- a/opencl/source/gen8/command_queue_gen8.cpp +++ b/opencl/source/gen8/command_queue_gen8.cpp @@ -16,8 +16,8 @@ namespace NEO { typedef Gen8Family Family; -#include "opencl/source/command_queue/command_queue_process_dispatch_for_kernels_instance.inl" static auto gfxCore = IGFX_GEN8_CORE; + template class CommandQueueHw; template <> diff --git a/opencl/source/gen9/command_queue_gen9.cpp b/opencl/source/gen9/command_queue_gen9.cpp index 2c3824fad7..8dddd508a9 100644 --- a/opencl/source/gen9/command_queue_gen9.cpp +++ b/opencl/source/gen9/command_queue_gen9.cpp @@ -16,10 +16,8 @@ namespace NEO { typedef Gen9Family Family; -#include "opencl/source/command_queue/command_queue_process_dispatch_for_kernels_instance.inl" -} // namespace NEO -namespace NEO { static auto gfxCore = IGFX_GEN9_CORE; + template class CommandQueueHw; template <> diff --git a/opencl/source/helpers/properties_helper.cpp b/opencl/source/helpers/properties_helper.cpp index 6cd42d1aec..eecd1b954a 100644 --- a/opencl/source/helpers/properties_helper.cpp +++ b/opencl/source/helpers/properties_helper.cpp @@ -20,6 +20,7 @@ namespace NEO { void flushDependentCsr(CommandStreamReceiver &dependentCsr, CsrDependencies &csrDeps) { auto csrOwnership = dependentCsr.obtainUniqueOwnership(); dependentCsr.updateTagFromWait(); + csrDeps.taskCountContainer.push_back({dependentCsr.peekTaskCount(), reinterpret_cast(dependentCsr.getTagAddress())}); } void EventsRequest::fillCsrDependenciesForTimestampPacketContainer(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr, CsrDependencies::DependenciesType depsType) const { @@ -53,7 +54,6 @@ void EventsRequest::fillCsrDependenciesForTimestampPacketContainer(CsrDependenci if (productHelper.isDcFlushAllowed()) { if (!dependentCsr.isLatestTaskCountFlushed()) { flushDependentCsr(dependentCsr, csrDeps); - //csrDeps.taskCountContainer.push_back({dependentCsr.peekTaskCount(), reinterpret_cast(dependentCsr.getTagAddress())}); currentCsr.makeResident(*dependentCsr.getTagAllocation()); } } @@ -62,22 +62,23 @@ void EventsRequest::fillCsrDependenciesForTimestampPacketContainer(CsrDependenci } } -void EventsRequest::fillCsrDependenciesForRootDevices(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr) const { +void EventsRequest::fillCsrDependenciesForTaskCountContainer(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr) const { for (cl_uint i = 0; i < this->numEventsInWaitList; i++) { auto event = castToObjectOrAbort(this->eventWaitList[i]); if (event->isUserEvent() || CompletionStamp::notReady == event->peekTaskCount()) { continue; } + if (event->getCommandQueue() && event->getCommandQueue()->getDevice().getRootDeviceIndex() != currentCsr.getRootDeviceIndex()) { - auto timestampPacketContainer = event->getMultiRootDeviceTimestampPacketNodes(); - if (!timestampPacketContainer || timestampPacketContainer->peekNodes().empty()) { - continue; - } auto &dependentCsr = event->getCommandQueue()->getGpgpuCommandStreamReceiver(); if (!dependentCsr.isLatestTaskCountFlushed()) { flushDependentCsr(dependentCsr, csrDeps); + } else { + csrDeps.taskCountContainer.push_back({event->peekTaskCount(), reinterpret_cast(dependentCsr.getTagAddress())}); } - csrDeps.multiRootTimeStampSyncContainer.push_back(timestampPacketContainer); + + auto graphicsAllocation = event->getCommandQueue()->getGpgpuCommandStreamReceiver().getTagsMultiAllocation()->getGraphicsAllocation(currentCsr.getRootDeviceIndex()); + currentCsr.getResidencyAllocations().push_back(graphicsAllocation); } } } diff --git a/opencl/source/helpers/properties_helper.h b/opencl/source/helpers/properties_helper.h index 0448176dc9..ed9b60f423 100644 --- a/opencl/source/helpers/properties_helper.h +++ b/opencl/source/helpers/properties_helper.h @@ -25,7 +25,7 @@ struct EventsRequest { : numEventsInWaitList(numEventsInWaitList), eventWaitList(eventWaitList), outEvent(outEvent) {} void fillCsrDependenciesForTimestampPacketContainer(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr, CsrDependencies::DependenciesType depsType) const; - void fillCsrDependenciesForRootDevices(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr) const; + void fillCsrDependenciesForTaskCountContainer(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr) const; void setupBcsCsrForOutputEvent(CommandStreamReceiver &bcsCsr) const; cl_uint numEventsInWaitList; diff --git a/opencl/source/helpers/task_information.cpp b/opencl/source/helpers/task_information.cpp index 5cf945a4cc..85da42fb82 100644 --- a/opencl/source/helpers/task_information.cpp +++ b/opencl/source/helpers/task_information.cpp @@ -117,11 +117,10 @@ CompletionStamp &CommandMapUnmap::submit(TaskCountType taskLevel, bool terminate CommandComputeKernel::CommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr &kernelOperation, std::vector surfaces, bool flushDC, bool usesSLM, uint32_t commandType, std::unique_ptr &&printfHandler, - PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount, - TagNodeBase *multiRootDeviceSyncNode) + PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount) : Command(commandQueue, kernelOperation), surfaces(std::move(surfaces)), flushDC(flushDC), slmUsed(usesSLM), commandType(commandType), printfHandler(std::move(printfHandler)), kernel(kernel), - kernelCount(kernelCount), preemptionMode(preemptionMode), multiRootDeviceSyncNode(multiRootDeviceSyncNode) { + kernelCount(kernelCount), preemptionMode(preemptionMode) { UNRECOVERABLE_IF(nullptr == this->kernel); kernel->incRefInternal(); } @@ -163,9 +162,6 @@ CompletionStamp &CommandComputeKernel::submit(TaskCountType taskLevel, bool term printfHandler->makeResident(commandStreamReceiver); } makeTimestampPacketsResident(commandStreamReceiver); - if (multiRootDeviceSyncNode != nullptr) { - commandStreamReceiver.makeResident(*multiRootDeviceSyncNode->getBaseGraphicsAllocation()); - } if (kernelOperation->blitPropertiesContainer.size() > 0) { CsrDependencies csrDeps; @@ -217,7 +213,7 @@ CompletionStamp &CommandComputeKernel::submit(TaskCountType taskLevel, bool term false); // hasRelaxedOrderingDependencies if (commandQueue.getContext().getRootDeviceIndices().size() > 1) { - eventsRequest.fillCsrDependenciesForRootDevices(dispatchFlags.csrDependencies, commandStreamReceiver); + eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver); } const bool isHandlingBarrier = commandQueue.getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired(); @@ -310,7 +306,7 @@ TaskCountType CommandWithoutKernel::dispatchBlitOperation() { blitProperties.outputTimestampPacket = currentTimestampPacketNodes->peekNodes()[0]; if (commandQueue.getContext().getRootDeviceIndices().size() > 1) { - eventsRequest.fillCsrDependenciesForRootDevices(blitProperties.csrDependencies, *bcsCsr); + eventsRequest.fillCsrDependenciesForTaskCountContainer(blitProperties.csrDependencies, *bcsCsr); } const auto newTaskCount = bcsCsr->flushBcsTask(kernelOperation->blitPropertiesContainer, false, commandQueue.isProfilingEnabled(), commandQueue.getDevice()); @@ -392,7 +388,7 @@ CompletionStamp &CommandWithoutKernel::submit(TaskCountType taskLevel, bool term false); // hasRelaxedOrderingDependencies if (commandQueue.getContext().getRootDeviceIndices().size() > 1) { - eventsRequest.fillCsrDependenciesForRootDevices(dispatchFlags.csrDependencies, commandStreamReceiver); + eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver); } const bool isHandlingBarrier = commandQueue.getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired(); diff --git a/opencl/source/helpers/task_information.h b/opencl/source/helpers/task_information.h index 54cc137710..decbf2e112 100644 --- a/opencl/source/helpers/task_information.h +++ b/opencl/source/helpers/task_information.h @@ -131,7 +131,7 @@ class CommandComputeKernel : public Command { public: CommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr &kernelOperation, std::vector surfaces, bool flushDC, bool usesSLM, uint32_t commandType, std::unique_ptr &&printfHandler, - PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount, TagNodeBase *multiRootDeviceSyncNode); + PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount); ~CommandComputeKernel() override; @@ -150,7 +150,6 @@ class CommandComputeKernel : public Command { Kernel *kernel; uint32_t kernelCount; PreemptionMode preemptionMode; - TagNodeBase *multiRootDeviceSyncNode; }; class CommandWithoutKernel : public Command { diff --git a/opencl/source/xe_hp_core/command_queue_xe_hp_core.cpp b/opencl/source/xe_hp_core/command_queue_xe_hp_core.cpp index b89f8cff97..7c502707e5 100644 --- a/opencl/source/xe_hp_core/command_queue_xe_hp_core.cpp +++ b/opencl/source/xe_hp_core/command_queue_xe_hp_core.cpp @@ -26,6 +26,7 @@ void populateFactoryTable>() { extern CommandQueueCreateFunc commandQueueFactory[IGFX_MAX_CORE]; commandQueueFactory[gfxCore] = CommandQueueHw::create; } + } // namespace NEO template class NEO::CommandQueueHw; diff --git a/opencl/source/xe_hpc_core/command_queue_xe_hpc_core.cpp b/opencl/source/xe_hpc_core/command_queue_xe_hpc_core.cpp index 2e8499d09a..56cc5c5a63 100644 --- a/opencl/source/xe_hpc_core/command_queue_xe_hpc_core.cpp +++ b/opencl/source/xe_hpc_core/command_queue_xe_hpc_core.cpp @@ -15,7 +15,6 @@ namespace NEO { using Family = XeHpcCoreFamily; -#include "opencl/source/command_queue/command_queue_process_dispatch_for_kernels_instance.inl" static auto gfxCore = IGFX_XE_HPC_CORE; } // namespace NEO diff --git a/opencl/source/xe_hpg_core/command_queue_xe_hpg_core.cpp b/opencl/source/xe_hpg_core/command_queue_xe_hpg_core.cpp index bf5f5b94a4..3f0647ce99 100644 --- a/opencl/source/xe_hpg_core/command_queue_xe_hpg_core.cpp +++ b/opencl/source/xe_hpg_core/command_queue_xe_hpg_core.cpp @@ -21,7 +21,6 @@ static auto gfxCore = IGFX_XE_HPG_CORE; #include "opencl/source/command_queue/command_queue_hw_xehp_and_later.inl" namespace NEO { -#include "opencl/source/command_queue/command_queue_process_dispatch_for_kernels_instance.inl" template <> void populateFactoryTable>() { extern CommandQueueCreateFunc commandQueueFactory[IGFX_MAX_CORE]; diff --git a/opencl/test/unit_test/command_queue/command_queue_hw_1_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_hw_1_tests.cpp index 73a9778faa..782b71dfc9 100644 --- a/opencl/test/unit_test/command_queue/command_queue_hw_1_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_hw_1_tests.cpp @@ -19,7 +19,6 @@ #include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/buffer_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" -#include "opencl/test/unit_test/helpers/cl_hw_parse.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" @@ -194,7 +193,6 @@ class MockCommandStreamReceiverWithFailingFlushBatchedSubmission : public MockCo template struct MockCommandQueueHwWithOverwrittenCsr : public CommandQueueHw { using CommandQueueHw::CommandQueueHw; - using CommandQueueHw::timestampPacketContainer; MockCommandStreamReceiverWithFailingFlushBatchedSubmission *csr; CommandStreamReceiver &getGpgpuCommandStreamReceiver() const override { return *csr; } }; @@ -220,243 +218,6 @@ HWTEST_F(CommandQueueHwTest, GivenCommandQueueWhenProcessDispatchForMarkerCalled EXPECT_GT(csr.makeResidentCalledTimes, 0u); } -HWTEST_F(CommandQueueHwTest, GivenEventWithRootDeviceSyncNodesWhenProcessDispatchForMarkerCalledThenEndNodeWillBeSignaledByPipeControl) { - - pDevice->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; - MockCommandStreamReceiverWithFailingFlushBatchedSubmission csr(*pDevice->getExecutionEnvironment(), 0, pDevice->getDeviceBitfield()); - auto mockTagAllocator = std::make_unique>(pCmdQ->getContextPtr()->getRootDeviceIndices(), pDevice->getExecutionEnvironment()->memoryManager.get(), 10u); - std::unique_ptr uniquePtr(mockTagAllocator.release()); - pCmdQ->getContextPtr()->setMultiRootDeviceTimestampPacketAllocator(uniquePtr); - auto myCmdQ = std::make_unique>(pCmdQ->getContextPtr(), pClDevice, nullptr, false); - myCmdQ->csr = &csr; - csr.osContext = &pCmdQ->getGpgpuCommandStreamReceiver().getOsContext(); - std::unique_ptr event(new Event(myCmdQ.get(), CL_COMMAND_COPY_BUFFER, 0, 0)); - event->getMultiRootTimestampSyncNode(); - event->getMultiRootTimestampSyncNode(); - auto lastNode = event->getMultiRootTimestampSyncNode(); - ASSERT_NE(nullptr, event); - - cl_event clEvent = event.get(); - EventsRequest eventsRequest(0, nullptr, &clEvent); - uint32_t streamBuffer[100] = {}; - NEO::LinearStream linearStream(streamBuffer, sizeof(streamBuffer)); - CsrDependencies deps = {}; - myCmdQ->processDispatchForMarker(*myCmdQ.get(), &linearStream, eventsRequest, deps); - - GenCmdList cmdList; - ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( - cmdList, ptrOffset(linearStream.getCpuBase(), 0), linearStream.getUsed())); - auto itor = find(cmdList.begin(), cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - bool expectedAddressInPipeControl = false; - while (itor != cmdList.end()) { - auto pipeControlCmd = reinterpret_cast(*itor); - uint64_t addressHigh = pipeControlCmd->getAddressHigh(); - uint64_t addressLow = pipeControlCmd->getAddress(); - addressHigh = addressHigh << 32; - uint64_t address = addressHigh | addressLow; - if (address == lastNode->getGpuAddress() + lastNode->getContextEndOffset()) { - expectedAddressInPipeControl = true; - break; - } - itor = find(++itor, cmdList.end()); - } - EXPECT_TRUE(expectedAddressInPipeControl); -} - -HWTEST_F(CommandQueueHwTest, GivenEventWithEmptyRootDeviceSyncNodesContainerWhenProcessDispatchForMarkerCalledThenSyncPipeControlIsNotProgrammed) { - - pDevice->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; - MockCommandStreamReceiverWithFailingFlushBatchedSubmission csr(*pDevice->getExecutionEnvironment(), 0, pDevice->getDeviceBitfield()); - auto mockTagAllocator = std::make_unique>(pCmdQ->getContextPtr()->getRootDeviceIndices(), pDevice->getExecutionEnvironment()->memoryManager.get(), 10u); - std::unique_ptr uniquePtr(mockTagAllocator.release()); - pCmdQ->getContextPtr()->setMultiRootDeviceTimestampPacketAllocator(uniquePtr); - auto myCmdQ = std::make_unique>(pCmdQ->getContextPtr(), pClDevice, nullptr, false); - myCmdQ->csr = &csr; - csr.osContext = &pCmdQ->getGpgpuCommandStreamReceiver().getOsContext(); - std::unique_ptr> event(new MockEvent(myCmdQ.get(), CL_COMMAND_COPY_BUFFER, 0, 0)); - auto node1 = event->getMultiRootTimestampSyncNode(); - auto node2 = event->getMultiRootTimestampSyncNode(); - auto node3 = event->getMultiRootTimestampSyncNode(); - event->multiRootDeviceTimestampPacketContainer.reset(new TimestampPacketContainer()); - ASSERT_NE(nullptr, event); - - cl_event clEvent = event.get(); - EventsRequest eventsRequest(0, nullptr, &clEvent); - uint32_t streamBuffer[100] = {}; - NEO::LinearStream linearStream(streamBuffer, sizeof(streamBuffer)); - CsrDependencies deps = {}; - myCmdQ->processDispatchForMarker(*myCmdQ.get(), &linearStream, eventsRequest, deps); - - GenCmdList cmdList; - ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( - cmdList, ptrOffset(linearStream.getCpuBase(), 0), linearStream.getUsed())); - auto itor = find(cmdList.begin(), cmdList.end()); - bool notExpectedAddressInPipeControl = true; - while (itor != cmdList.end()) { - auto pipeControlCmd = reinterpret_cast(*itor); - uint64_t addressHigh = pipeControlCmd->getAddressHigh(); - uint64_t addressLow = pipeControlCmd->getAddress(); - addressHigh = addressHigh << 32; - uint64_t address = addressHigh | addressLow; - if (address == node1->getGpuAddress() + node1->getContextEndOffset() || - address == node2->getGpuAddress() + node2->getContextEndOffset() || - address == node3->getGpuAddress() + node3->getContextEndOffset()) { - notExpectedAddressInPipeControl = false; - break; - } - itor = find(++itor, cmdList.end()); - } - EXPECT_TRUE(notExpectedAddressInPipeControl); -} - -HWTEST_F(CommandQueueHwTest, GivenEventWithRootDeviceSyncNodesWhenProcessDispatchForMarkerWithTimestampPacketCalledThenEndNodeWillBeSignaledByPipeControl) { - - pDevice->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; - MockCommandStreamReceiverWithFailingFlushBatchedSubmission csr(*pDevice->getExecutionEnvironment(), 0, pDevice->getDeviceBitfield()); - auto mockTagAllocator = std::make_unique>(pCmdQ->getContextPtr()->getRootDeviceIndices(), pDevice->getExecutionEnvironment()->memoryManager.get(), 10u); - std::unique_ptr uniquePtr(mockTagAllocator.release()); - pCmdQ->getContextPtr()->setMultiRootDeviceTimestampPacketAllocator(uniquePtr); - auto myCmdQ = std::make_unique>(pCmdQ->getContextPtr(), pClDevice, nullptr, false); - myCmdQ->csr = &csr; - - myCmdQ->timestampPacketContainer = std::make_unique(); - myCmdQ->timestampPacketContainer->add(pCmdQ->getContextPtr()->getMultiRootDeviceTimestampPacketAllocator()->getTag()); - - csr.osContext = &pCmdQ->getGpgpuCommandStreamReceiver().getOsContext(); - std::unique_ptr event(new Event(myCmdQ.get(), CL_COMMAND_COPY_BUFFER, 0, 0)); - event->getMultiRootTimestampSyncNode(); - event->getMultiRootTimestampSyncNode(); - auto lastNode = event->getMultiRootTimestampSyncNode(); - ASSERT_NE(nullptr, event); - - cl_event clEvent = event.get(); - EventsRequest eventsRequest(0, nullptr, &clEvent); - uint32_t streamBuffer[100] = {}; - NEO::LinearStream linearStream(streamBuffer, sizeof(streamBuffer)); - CsrDependencies deps = {}; - myCmdQ->processDispatchForMarkerWithTimestampPacket(*myCmdQ.get(), &linearStream, eventsRequest, deps); - - GenCmdList cmdList; - ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( - cmdList, ptrOffset(linearStream.getCpuBase(), 0), linearStream.getUsed())); - auto itor = find(cmdList.begin(), cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - - bool expectedAddressInPipeControl = false; - while (itor != cmdList.end()) { - auto pipeControlCmd = reinterpret_cast(*itor); - uint64_t addressHigh = pipeControlCmd->getAddressHigh(); - uint64_t addressLow = pipeControlCmd->getAddress(); - addressHigh = addressHigh << 32; - uint64_t address = addressHigh | addressLow; - if (address == lastNode->getGpuAddress() + lastNode->getContextEndOffset()) { - expectedAddressInPipeControl = true; - break; - } - itor = find(++itor, cmdList.end()); - } - EXPECT_TRUE(expectedAddressInPipeControl); -} - -HWTEST_F(CommandQueueHwTest, GivenEventWithEmptyRootDeviceSyncNodesContainerWhenProcessDispatchForMarkerWithTimestampPacketCalledThenSyncPipeControlIsNotProgrammed) { - - pDevice->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; - MockCommandStreamReceiverWithFailingFlushBatchedSubmission csr(*pDevice->getExecutionEnvironment(), 0, pDevice->getDeviceBitfield()); - auto mockTagAllocator = std::make_unique>(pCmdQ->getContextPtr()->getRootDeviceIndices(), pDevice->getExecutionEnvironment()->memoryManager.get(), 10u); - std::unique_ptr uniquePtr(mockTagAllocator.release()); - pCmdQ->getContextPtr()->setMultiRootDeviceTimestampPacketAllocator(uniquePtr); - auto myCmdQ = std::make_unique>(pCmdQ->getContextPtr(), pClDevice, nullptr, false); - myCmdQ->csr = &csr; - - myCmdQ->timestampPacketContainer = std::make_unique(); - myCmdQ->timestampPacketContainer->add(pCmdQ->getContextPtr()->getMultiRootDeviceTimestampPacketAllocator()->getTag()); - - csr.osContext = &pCmdQ->getGpgpuCommandStreamReceiver().getOsContext(); - std::unique_ptr> event(new MockEvent(myCmdQ.get(), CL_COMMAND_COPY_BUFFER, 0, 0)); - auto node1 = event->getMultiRootTimestampSyncNode(); - auto node2 = event->getMultiRootTimestampSyncNode(); - auto node3 = event->getMultiRootTimestampSyncNode(); - event->multiRootDeviceTimestampPacketContainer.reset(new TimestampPacketContainer()); - ASSERT_NE(nullptr, event); - - cl_event clEvent = event.get(); - EventsRequest eventsRequest(0, nullptr, &clEvent); - uint32_t streamBuffer[100] = {}; - NEO::LinearStream linearStream(streamBuffer, sizeof(streamBuffer)); - CsrDependencies deps = {}; - myCmdQ->processDispatchForMarkerWithTimestampPacket(*myCmdQ.get(), &linearStream, eventsRequest, deps); - - GenCmdList cmdList; - ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( - cmdList, ptrOffset(linearStream.getCpuBase(), 0), linearStream.getUsed())); - auto itor = find(cmdList.begin(), cmdList.end()); - bool notExpectedAddressInPipeControl = true; - while (itor != cmdList.end()) { - auto pipeControlCmd = reinterpret_cast(*itor); - uint64_t addressHigh = pipeControlCmd->getAddressHigh(); - uint64_t addressLow = pipeControlCmd->getAddress(); - addressHigh = addressHigh << 32; - uint64_t address = addressHigh | addressLow; - if (address == node1->getGpuAddress() + node1->getContextEndOffset() || - address == node2->getGpuAddress() + node2->getContextEndOffset() || - address == node3->getGpuAddress() + node3->getContextEndOffset()) { - notExpectedAddressInPipeControl = false; - break; - } - itor = find(++itor, cmdList.end()); - } - EXPECT_TRUE(notExpectedAddressInPipeControl); -} - -HWTEST_F(CommandQueueHwTest, GivenEventRequestWithoutOutEventWhenProcessDispatchForMarkerWithTimestampPacketCalledThenSyncPipeControlIsNotProgrammed) { - - pDevice->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; - MockCommandStreamReceiverWithFailingFlushBatchedSubmission csr(*pDevice->getExecutionEnvironment(), 0, pDevice->getDeviceBitfield()); - auto mockTagAllocator = std::make_unique>(pCmdQ->getContextPtr()->getRootDeviceIndices(), pDevice->getExecutionEnvironment()->memoryManager.get(), 10u); - std::unique_ptr uniquePtr(mockTagAllocator.release()); - pCmdQ->getContextPtr()->setMultiRootDeviceTimestampPacketAllocator(uniquePtr); - auto myCmdQ = std::make_unique>(pCmdQ->getContextPtr(), pClDevice, nullptr, false); - myCmdQ->csr = &csr; - - myCmdQ->timestampPacketContainer = std::make_unique(); - myCmdQ->timestampPacketContainer->add(pCmdQ->getContextPtr()->getMultiRootDeviceTimestampPacketAllocator()->getTag()); - - csr.osContext = &pCmdQ->getGpgpuCommandStreamReceiver().getOsContext(); - std::unique_ptr> event(new MockEvent(myCmdQ.get(), CL_COMMAND_COPY_BUFFER, 0, 0)); - auto node1 = event->getMultiRootTimestampSyncNode(); - auto node2 = event->getMultiRootTimestampSyncNode(); - auto node3 = event->getMultiRootTimestampSyncNode(); - ASSERT_NE(nullptr, event); - - EventsRequest eventsRequest(0, nullptr, nullptr); - uint32_t streamBuffer[100] = {}; - NEO::LinearStream linearStream(streamBuffer, sizeof(streamBuffer)); - CsrDependencies deps = {}; - myCmdQ->processDispatchForMarkerWithTimestampPacket(*myCmdQ.get(), &linearStream, eventsRequest, deps); - - GenCmdList cmdList; - ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( - cmdList, ptrOffset(linearStream.getCpuBase(), 0), linearStream.getUsed())); - auto itor = find(cmdList.begin(), cmdList.end()); - bool notExpectedAddressInPipeControl = true; - while (itor != cmdList.end()) { - auto pipeControlCmd = reinterpret_cast(*itor); - uint64_t addressHigh = pipeControlCmd->getAddressHigh(); - uint64_t addressLow = pipeControlCmd->getAddress(); - addressHigh = addressHigh << 32; - uint64_t address = addressHigh | addressLow; - if (address == node1->getGpuAddress() + node1->getContextEndOffset() || - address == node2->getGpuAddress() + node2->getContextEndOffset() || - address == node3->getGpuAddress() + node3->getContextEndOffset()) { - notExpectedAddressInPipeControl = false; - break; - } - itor = find(++itor, cmdList.end()); - } - EXPECT_TRUE(notExpectedAddressInPipeControl); -} - HWTEST_F(CommandQueueHwTest, GivenCommandQueueWhenItIsCreatedThenInitDirectSubmissionIsCalledOnAllBcsEngines) { MockCommandQueueHw queue(pContext, pClDevice, nullptr); for (auto engine : queue.bcsEngines) { diff --git a/opencl/test/unit_test/command_queue/command_queue_hw_2_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_hw_2_tests.cpp index ecdb73218b..61820e0fdd 100644 --- a/opencl/test/unit_test/command_queue/command_queue_hw_2_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_hw_2_tests.cpp @@ -20,7 +20,6 @@ #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" -#include "opencl/test/unit_test/mocks/mock_mdi.h" using namespace NEO; @@ -277,7 +276,7 @@ HWTEST_F(OOQueueHwTest, givenBlockedOutOfOrderCmdQueueAndAsynchronouslyCompleted cmdQHw->taskLevel = 23; cmdQHw->enqueueKernel(mockKernel, 1, &offset, &size, &size, 1, &blockedEvent, nullptr); - // new virtual event is created on enqueue, bind it to the created virtual event + //new virtual event is created on enqueue, bind it to the created virtual event EXPECT_NE(cmdQHw->virtualEvent, &virtualEvent); event.setStatus(CL_SUBMITTED); @@ -286,7 +285,7 @@ HWTEST_F(OOQueueHwTest, givenBlockedOutOfOrderCmdQueueAndAsynchronouslyCompleted EXPECT_FALSE(cmdQHw->isQueueBlocked()); //+1 due to dependency between virtual event & new virtual event - // new virtual event is actually responsible for command delivery + //new virtual event is actually responsible for command delivery EXPECT_EQ(virtualEventTaskLevel + 1, cmdQHw->taskLevel); EXPECT_EQ(virtualEventTaskLevel + 1, mockCSR->lastTaskLevelToFlushTask); } @@ -971,90 +970,3 @@ HWTEST_F(CommandQueueHwTest, GivenBuiltinKernelWhenBuiltinDispatchInfoBuilderIsP EXPECT_EQ(builder.paramsToUse.offset.x, dispatchInfo->getOffset().x); EXPECT_EQ(builder.paramsToUse.kernel, dispatchInfo->getKernel()); } -HWTEST_F(CommandQueueHwTest, GivenMultiRootDeviceSyncEventWhenProcessDispatchForKernelsThenSyncNodeSignaledByPipeControll) { - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - MockDefaultContext context{true}; - std::unique_ptr pCmdQ1(createCommandQueue(context.getDevice(0), nullptr, &context)); - CommandQueueHw *cmdQHw = static_cast *>(pCmdQ1.get()); - MockKernelWithInternals mockKernelWithInternals(*context.getDevice(0), &context); - - MockMultiDispatchInfo multiDispatchInfo(pClDevice, std::vector({mockKernelWithInternals.mockKernel})); - std::unique_ptr printfHandler; - std::unique_ptr hwTimeStamps = std::make_unique>>(); - TagNodeBase *hwTimeStampsPtr = hwTimeStamps.get(); - bool blockQueue = false; - CsrDependencies csrDeps = {}; - KernelOperation *blockedCommandsData = nullptr; - TimestampPacketDependencies timestampPacketDependencies = {}; - std::unique_ptr> event(new MockEvent(cmdQHw, CL_COMMAND_COPY_BUFFER, 0, 0)); - auto node = event->getMultiRootTimestampSyncNode(); - reinterpret_cast *>(cmdQHw)->timestampPacketContainer.reset(); - reinterpret_cast *>(cmdQHw)->template processDispatchForKernels(multiDispatchInfo, - printfHandler, - event.get(), - hwTimeStampsPtr, - blockQueue, - csrDeps, - blockedCommandsData, - timestampPacketDependencies); - - HardwareParse ccsHwParser; - ccsHwParser.parseCommands(cmdQHw->getCS(0), 0u); - - auto pipeControlItor = find(ccsHwParser.cmdList.begin(), ccsHwParser.cmdList.end()); - bool expectedAddressInPipeControl = false; - while (pipeControlItor != ccsHwParser.cmdList.end()) { - auto pipeControlCmd = reinterpret_cast(*pipeControlItor); - uint64_t addressHigh = pipeControlCmd->getAddressHigh(); - uint64_t addressLow = pipeControlCmd->getAddress(); - addressHigh = addressHigh << 32; - uint64_t address = addressHigh | addressLow; - if (address == node->getGpuAddress() + node->getContextEndOffset()) { - expectedAddressInPipeControl = true; - break; - } - pipeControlItor = find(++pipeControlItor, ccsHwParser.cmdList.end()); - } - EXPECT_TRUE(expectedAddressInPipeControl); -} -HWTEST_F(CommandQueueHwTest, GivenMultiRootDeviceSyncEventWithEmptyDeviceSyncContainerWhenProcessDispatchForKernelsThenSyncNodeNotSignalled) { - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - MockDefaultContext context{true}; - std::unique_ptr pCmdQ1(createCommandQueue(context.getDevice(0), nullptr, &context)); - CommandQueueHw *cmdQHw = static_cast *>(pCmdQ1.get()); - MockKernelWithInternals mockKernelWithInternals(*context.getDevice(0), &context); - - MockMultiDispatchInfo multiDispatchInfo(pClDevice, std::vector({mockKernelWithInternals.mockKernel})); - std::unique_ptr printfHandler; - std::unique_ptr hwTimeStamps = std::make_unique>>(); - TagNodeBase *hwTimeStampsPtr = hwTimeStamps.get(); - bool blockQueue = false; - CsrDependencies csrDeps = {}; - KernelOperation *blockedCommandsData = nullptr; - TimestampPacketDependencies timestampPacketDependencies = {}; - std::unique_ptr> event(new MockEvent(cmdQHw, CL_COMMAND_COPY_BUFFER, 0, 0)); - auto node = event->getMultiRootTimestampSyncNode(); - node->incRefCount(); - event->multiRootDeviceTimestampPacketContainer = std::make_unique(); - reinterpret_cast *>(cmdQHw)->timestampPacketContainer.reset(); - reinterpret_cast *>(cmdQHw)->template processDispatchForKernels(multiDispatchInfo, printfHandler, event.get(), hwTimeStampsPtr, blockQueue, csrDeps, blockedCommandsData, timestampPacketDependencies); - HardwareParse ccsHwParser; - ccsHwParser.parseCommands(cmdQHw->getCS(0), 0u); - - auto pipeControlItor = find(ccsHwParser.cmdList.begin(), ccsHwParser.cmdList.end()); - bool expectedAddressInPipeControl = false; - while (pipeControlItor != ccsHwParser.cmdList.end()) { - auto pipeControlCmd = reinterpret_cast(*pipeControlItor); - uint64_t addressHigh = pipeControlCmd->getAddressHigh(); - uint64_t addressLow = pipeControlCmd->getAddress(); - addressHigh = addressHigh << 32; - uint64_t address = addressHigh | addressLow; - if (address == node->getGpuAddress() + node->getContextEndOffset()) { - expectedAddressInPipeControl = true; - break; - } - pipeControlItor = find(++pipeControlItor, ccsHwParser.cmdList.end()); - } - EXPECT_FALSE(expectedAddressInPipeControl); - node->returnTag(); -} \ No newline at end of file diff --git a/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp b/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp index 1e0f618e58..962e2a4a5a 100644 --- a/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp +++ b/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp @@ -29,7 +29,6 @@ #include "opencl/test/unit_test/fixtures/cl_device_fixture.h" #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" -#include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_mdi.h" #include "opencl/test/unit_test/mocks/mock_program.h" diff --git a/opencl/test/unit_test/command_queue/dispatch_walker_tests_xehp_and_later.cpp b/opencl/test/unit_test/command_queue/dispatch_walker_tests_xehp_and_later.cpp index e080f6d16a..10d0e767c2 100644 --- a/opencl/test/unit_test/command_queue/dispatch_walker_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/command_queue/dispatch_walker_tests_xehp_and_later.cpp @@ -557,11 +557,11 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenTimestamp MockMultiDispatchInfo multiDispatchInfo(device.get(), std::vector({kernel1.mockKernel, kernel2.mockKernel})); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; - getCommandStream(cmdQ, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false, nullptr); + getCommandStream(cmdQ, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false); size_t sizeWithDisabled = cmdQ.requestedCmdStreamSize; device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; - getCommandStream(cmdQ, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false, nullptr); + getCommandStream(cmdQ, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false); size_t sizeWithEnabled = cmdQ.requestedCmdStreamSize; size_t additionalSize = 0u; @@ -669,7 +669,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenAutoLocal EXPECT_EQ((uint32_t)(expectedKernelStartOffset), idd.getKernelStartPointer()); auto expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, CsrDependencies(), false, false, - false, *cmdQ.get(), multiDispatchInfo, false, false, nullptr); + false, *cmdQ.get(), multiDispatchInfo, false, false); expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize); EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS); @@ -738,7 +738,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenPassInlin EXPECT_EQ((uint32_t)(expectedKernelStartOffset), idd.getKernelStartPointer()); auto expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, CsrDependencies(), false, false, - false, *cmdQ.get(), multiDispatchInfo, false, false, nullptr); + false, *cmdQ.get(), multiDispatchInfo, false, false); expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedSizeCS = alignUp(expectedSizeCS, MemoryConstants::cacheLineSize); EXPECT_GE(expectedSizeCS, usedAfterCS - usedBeforeCS); diff --git a/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp index ab44c67049..9bbee21f1b 100644 --- a/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp @@ -233,7 +233,7 @@ HWTEST_F(EnqueueHandlerTest, givenNonBlitPropertyWhenEnqueueIsBlockedThenDontReg Surface *surfaces[] = {nullptr}; mockCmdQ->enqueueBlocked(CL_COMMAND_MARKER, surfaces, size_t(0), multiDispatchInfo, timestampPacketDependencies, blockedCommandsData, enqueuePropertiesForDependencyFlush, eventsRequest, - eventBuilder, std::unique_ptr(nullptr), nullptr, nullptr); + eventBuilder, std::unique_ptr(nullptr), nullptr); EXPECT_FALSE(blockedCommandsDataForDependencyFlush->blitEnqueue); } @@ -266,7 +266,7 @@ HWTEST_F(EnqueueHandlerTest, givenBlitPropertyWhenEnqueueIsBlockedThenRegisterBl Surface *surfaces[] = {nullptr}; mockCmdQ->enqueueBlocked(CL_COMMAND_READ_BUFFER, surfaces, size_t(0), multiDispatchInfo, timestampPacketDependencies, blockedCommandsData, enqueuePropertiesForBlitEnqueue, eventsRequest, - eventBuilder, std::unique_ptr(nullptr), mockCmdQ->getBcsForAuxTranslation(), nullptr); + eventBuilder, std::unique_ptr(nullptr), mockCmdQ->getBcsForAuxTranslation()); EXPECT_TRUE(blockedCommandsDataForBlitEnqueue->blitEnqueue); EXPECT_EQ(blitProperties.srcAllocation, blockedCommandsDataForBlitEnqueue->blitPropertiesContainer.begin()->srcAllocation); EXPECT_EQ(blitProperties.dstAllocation, blockedCommandsDataForBlitEnqueue->blitPropertiesContainer.begin()->dstAllocation); @@ -350,7 +350,7 @@ HWTEST_F(DispatchFlagsBlitTests, givenBlitEnqueueWhenDispatchingCommandsWithoutK timestampPacketDependencies.cacheFlushNodes.add(mockCmdQ->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag()); BlitProperties blitProperties = mockCmdQ->processDispatchForBlitEnqueue(bcsCsr, multiDispatchInfo, timestampPacketDependencies, - eventsRequest, &mockCmdQ->getCS(0), CL_COMMAND_READ_BUFFER, false, nullptr); + eventsRequest, &mockCmdQ->getCS(0), CL_COMMAND_READ_BUFFER, false); BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); @@ -394,7 +394,7 @@ HWTEST_F(DispatchFlagsBlitTests, givenN1EnabledWhenDispatchingWithoutKernelThenA mockCmdQ->obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, true, bcsCsr); timestampPacketDependencies.cacheFlushNodes.add(mockCmdQ->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator()->getTag()); BlitProperties blitProperties = mockCmdQ->processDispatchForBlitEnqueue(bcsCsr, multiDispatchInfo, timestampPacketDependencies, - eventsRequest, &mockCmdQ->getCS(0), CL_COMMAND_READ_BUFFER, false, nullptr); + eventsRequest, &mockCmdQ->getCS(0), CL_COMMAND_READ_BUFFER, false); BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); @@ -440,7 +440,7 @@ HWTEST_F(DispatchFlagsTests, givenMockKernelWhenSettingAdditionalKernelExecInfoT std::vector v; pKernel->setAdditionalKernelExecInfo(123u); - std::unique_ptr cmd(new CommandComputeKernel(*mockCmdQ.get(), blockedCommandsData, v, false, false, false, std::move(printfHandler), PreemptionMode::Disabled, pKernel, 1, nullptr)); + std::unique_ptr cmd(new CommandComputeKernel(*mockCmdQ.get(), blockedCommandsData, v, false, false, false, std::move(printfHandler), PreemptionMode::Disabled, pKernel, 1)); cmd->submit(1u, false); EXPECT_EQ(mockCsr->passedDispatchFlags.additionalKernelExecInfo, 123u); diff --git a/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests_dg2_and_later.cpp b/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests_dg2_and_later.cpp index d794aa5338..07d5dc7321 100644 --- a/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests_dg2_and_later.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests_dg2_and_later.cpp @@ -53,7 +53,7 @@ HWTEST2_F(DispatchFlagsTests, whenSubmittingKernelWithAdditionalKernelExecInfoTh std::vector v; pKernel->setAdditionalKernelExecInfo(AdditionalKernelExecInfo::DisableOverdispatch); - std::unique_ptr cmd(new CommandComputeKernel(*mockCmdQ.get(), blockedCommandsData, v, false, false, false, std::move(printfHandler), PreemptionMode::Disabled, pKernel, 1, nullptr)); + std::unique_ptr cmd(new CommandComputeKernel(*mockCmdQ.get(), blockedCommandsData, v, false, false, false, std::move(printfHandler), PreemptionMode::Disabled, pKernel, 1)); cmd->submit(1u, false); EXPECT_EQ(mockCsr->passedDispatchFlags.additionalKernelExecInfo, AdditionalKernelExecInfo::DisableOverdispatch); diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp index aed5c4b56e..e4e53386b4 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp @@ -2002,10 +2002,10 @@ HWTEST_F(PauseOnGpuTests, givenGpuScratchWriteEnabledWhenEstimatingCommandStream dispatchInfo.setKernel(mockKernel.mockKernel); multiDispatchInfo.push(dispatchInfo); - auto baseCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, nullptr); + auto baseCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false); DebugManager.flags.GpuScratchRegWriteAfterWalker.set(1); - auto extendedCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, nullptr); + auto extendedCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false); EXPECT_EQ(baseCommandStreamSize + sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM), extendedCommandStreamSize); } diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp index 57ff16afe5..b3182664df 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp @@ -1010,8 +1010,8 @@ HWTEST_F(EnqueueKernelTest, givenTimestampWriteEnableWhenMarkerProfilingWithoutW dispatchInfo.setKernel(mockKernel.mockKernel); multiDispatchInfo.push(dispatchInfo); - auto baseCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, nullptr); - auto extendedCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, true, false, nullptr); + auto baseCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false); + auto extendedCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, true, false); EXPECT_EQ(baseCommandStreamSize + 4 * EncodeStoreMMIO::size + MemorySynchronizationCommands::getSizeForSingleBarrier(false), extendedCommandStreamSize); } @@ -1029,8 +1029,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, EnqueueKernelTest, givenTimestampWriteEnableOnMulti dispatchInfo.setKernel(mockKernel.mockKernel); multiDispatchInfo.push(dispatchInfo); - auto baseCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, nullptr); - auto extendedCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, true, false, nullptr); + auto baseCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false); + auto extendedCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, true, false); EXPECT_EQ(baseCommandStreamSize + 4 * EncodeStoreMMIO::size + ImplicitScalingDispatch::getBarrierSize(csr.peekHwInfo(), false, false), extendedCommandStreamSize); } @@ -1043,8 +1043,8 @@ HWTEST_F(EnqueueKernelTest, givenTimestampWriteEnableWhenMarkerProfilingWithWait dispatchInfo.setKernel(mockKernel.mockKernel); multiDispatchInfo.push(dispatchInfo); - auto baseCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, nullptr); - auto extendedCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, true, true, nullptr); + auto baseCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false); + auto extendedCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, true, true); EXPECT_EQ(baseCommandStreamSize + 4 * EncodeStoreMMIO::size, extendedCommandStreamSize); } diff --git a/opencl/test/unit_test/command_queue/get_size_required_buffer_tests.cpp b/opencl/test/unit_test/command_queue/get_size_required_buffer_tests.cpp index a7c9169e62..68e1038d82 100644 --- a/opencl/test/unit_test/command_queue/get_size_required_buffer_tests.cpp +++ b/opencl/test/unit_test/command_queue/get_size_required_buffer_tests.cpp @@ -23,7 +23,6 @@ #include "opencl/test/unit_test/fixtures/hello_world_kernel_fixture.h" #include "opencl/test/unit_test/fixtures/image_fixture.h" #include "opencl/test/unit_test/fixtures/simple_arg_kernel_fixture.h" -#include "opencl/test/unit_test/mocks/mock_event.h" using namespace NEO; @@ -97,7 +96,7 @@ HWTEST_F(GetSizeRequiredBufferTest, WhenFillingBufferThenHeapsAndCommandBufferCo auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_FILL_BUFFER, CsrDependencies(), false, false, - false, *pCmdQ, multiDispatchInfo, false, false, nullptr); + false, *pCmdQ, multiDispatchInfo, false, false); auto expectedSizeDSH = HardwareCommandsHelper::getTotalSizeRequiredDSH(multiDispatchInfo); auto expectedSizeIOH = HardwareCommandsHelper::getTotalSizeRequiredIOH(multiDispatchInfo); auto expectedSizeSSH = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); @@ -150,7 +149,7 @@ HWTEST_F(GetSizeRequiredBufferTest, WhenCopyingBufferThenHeapsAndCommandBufferCo auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_COPY_BUFFER, CsrDependencies(), false, false, - false, *pCmdQ, multiDispatchInfo, false, false, nullptr); + false, *pCmdQ, multiDispatchInfo, false, false); auto expectedSizeDSH = HardwareCommandsHelper::getTotalSizeRequiredDSH(multiDispatchInfo); auto expectedSizeIOH = HardwareCommandsHelper::getTotalSizeRequiredIOH(multiDispatchInfo); auto expectedSizeSSH = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); @@ -204,7 +203,7 @@ HWTEST_F(GetSizeRequiredBufferTest, WhenReadingBufferNonBlockingThenHeapsAndComm auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_READ_BUFFER, CsrDependencies(), false, false, - false, *pCmdQ, multiDispatchInfo, false, false, nullptr); + false, *pCmdQ, multiDispatchInfo, false, false); auto expectedSizeDSH = HardwareCommandsHelper::getTotalSizeRequiredDSH(multiDispatchInfo); auto expectedSizeIOH = HardwareCommandsHelper::getTotalSizeRequiredIOH(multiDispatchInfo); auto expectedSizeSSH = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); @@ -259,7 +258,7 @@ HWTEST_F(GetSizeRequiredBufferTest, WhenReadingBufferBlockingThenThenHeapsAndCom auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_READ_BUFFER, CsrDependencies(), false, false, - false, *pCmdQ, multiDispatchInfo, false, false, nullptr); + false, *pCmdQ, multiDispatchInfo, false, false); auto expectedSizeDSH = HardwareCommandsHelper::getTotalSizeRequiredDSH(multiDispatchInfo); auto expectedSizeIOH = HardwareCommandsHelper::getTotalSizeRequiredIOH(multiDispatchInfo); auto expectedSizeSSH = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); @@ -314,7 +313,7 @@ HWTEST_F(GetSizeRequiredBufferTest, WhenWritingBufferNonBlockingThenHeapsAndComm auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_WRITE_BUFFER, CsrDependencies(), false, false, - false, *pCmdQ, multiDispatchInfo, false, false, nullptr); + false, *pCmdQ, multiDispatchInfo, false, false); auto expectedSizeDSH = HardwareCommandsHelper::getTotalSizeRequiredDSH(multiDispatchInfo); auto expectedSizeIOH = HardwareCommandsHelper::getTotalSizeRequiredIOH(multiDispatchInfo); auto expectedSizeSSH = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); @@ -366,7 +365,7 @@ HWTEST_F(GetSizeRequiredBufferTest, WhenWritingBufferBlockingThenHeapsAndCommand auto usedAfterSSH = ssh.getUsed(); auto expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_WRITE_BUFFER, CsrDependencies(), false, false, - false, *pCmdQ, multiDispatchInfo, false, false, nullptr); + false, *pCmdQ, multiDispatchInfo, false, false); auto expectedSizeDSH = HardwareCommandsHelper::getTotalSizeRequiredDSH(multiDispatchInfo); auto expectedSizeIOH = HardwareCommandsHelper::getTotalSizeRequiredIOH(multiDispatchInfo); auto expectedSizeSSH = HardwareCommandsHelper::getTotalSizeRequiredSSH(multiDispatchInfo); @@ -381,68 +380,6 @@ HWTEST_F(GetSizeRequiredBufferTest, WhenWritingBufferBlockingThenHeapsAndCommand EXPECT_GE(expectedSizeSSH, usedAfterSSH - usedBeforeSSH); } -HWTEST_F(GetSizeRequiredBufferTest, GivenOutEventForSingleDeviceContextWhenCalculatingCSSizeThenExtraPipeControlIsNotAdded) { - UltClDeviceFactory deviceFactory{1, 0}; - DebugManager.flags.EnableMultiRootDeviceContexts.set(true); - - cl_device_id devices[] = {deviceFactory.rootDevices[0]}; - - MockContext pContext(ClDeviceVector(devices, 1)); - MockKernelWithInternals mockKernel(*pContext.getDevices()[0]); - DispatchInfo dispatchInfo; - MultiDispatchInfo multiDispatchInfo(mockKernel.mockKernel); - dispatchInfo.setKernel(mockKernel.mockKernel); - multiDispatchInfo.push(dispatchInfo); - auto event = std::make_unique>(&pContext, nullptr, 0, 0, 0); - cl_event clEvent = event.get(); - auto baseCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, nullptr); - auto extendedCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, &clEvent); - - EXPECT_EQ(baseCommandStreamSize, extendedCommandStreamSize); -} - -HWTEST_F(GetSizeRequiredBufferTest, GivenUserEventForMultiDeviceContextWhenCalculatingCSSizeThenExtraPipeControlIsNotAdded) { - UltClDeviceFactory deviceFactory{2, 0}; - DebugManager.flags.EnableMultiRootDeviceContexts.set(true); - - cl_device_id devices[] = {deviceFactory.rootDevices[0], - deviceFactory.rootDevices[1]}; - - MockContext pContext(ClDeviceVector(devices, 2)); - MockKernelWithInternals mockKernel(*pContext.getDevices()[0]); - DispatchInfo dispatchInfo; - MultiDispatchInfo multiDispatchInfo(mockKernel.mockKernel); - dispatchInfo.setKernel(mockKernel.mockKernel); - multiDispatchInfo.push(dispatchInfo); - auto userEvent1 = std::make_unique(&pContext); - cl_event clEvent = userEvent1.get(); - auto baseCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, nullptr); - auto extendedCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, &clEvent); - - EXPECT_EQ(baseCommandStreamSize, extendedCommandStreamSize); -} - -HWTEST_F(GetSizeRequiredBufferTest, GivenOutEventForMultiDeviceContextWhenCalculatingCSSizeThenExtraPipeControlIsAdded) { - UltClDeviceFactory deviceFactory{2, 0}; - DebugManager.flags.EnableMultiRootDeviceContexts.set(true); - - cl_device_id devices[] = {deviceFactory.rootDevices[0], - deviceFactory.rootDevices[1]}; - - MockContext pContext(ClDeviceVector(devices, 2)); - MockKernelWithInternals mockKernel(*pContext.getDevices()[0]); - DispatchInfo dispatchInfo; - MultiDispatchInfo multiDispatchInfo(mockKernel.mockKernel); - dispatchInfo.setKernel(mockKernel.mockKernel); - multiDispatchInfo.push(dispatchInfo); - auto event = std::make_unique>(&pContext, nullptr, 0, 0, 0); - cl_event clEvent = event.get(); - auto baseCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, nullptr); - auto extendedCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, &clEvent); - - EXPECT_EQ(baseCommandStreamSize + MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(pContext.getDevices()[0]->getHardwareInfo(), false), extendedCommandStreamSize); -} - HWTEST_F(GetSizeRequiredBufferTest, givenMultipleKernelRequiringSshWhenTotalSizeIsComputedThenItIsProperlyAligned) { auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, pCmdQ->getClDevice()); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp index 5602b17916..fb00196b5a 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp @@ -1902,7 +1902,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBlockedKernelWhenItIsUnblocke blockedCommandsData->setHeaps(dsh, ioh, ssh); std::vector surfaces; - event->setCommand(std::make_unique(*pCmdQ, blockedCommandsData, surfaces, false, false, false, nullptr, pDevice->getPreemptionMode(), pKernel, 1, nullptr)); + event->setCommand(std::make_unique(*pCmdQ, blockedCommandsData, surfaces, false, false, false, nullptr, pDevice->getPreemptionMode(), pKernel, 1)); event->submitCommand(false); EXPECT_EQ(numGrfRequired, csr->savedDispatchFlags.numGrfRequired); @@ -1947,7 +1947,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBlockedKernelWhenInitializeBc auto blockedCommandsData = std::make_unique(cmdStream, *pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); std::vector surfaces; - event->setCommand(std::make_unique(*pCmdQ, blockedCommandsData, surfaces, false, false, false, nullptr, pDevice->getPreemptionMode(), pKernel, 1, nullptr)); + event->setCommand(std::make_unique(*pCmdQ, blockedCommandsData, surfaces, false, false, false, nullptr, pDevice->getPreemptionMode(), pKernel, 1)); event->submitCommand(false); EXPECT_FALSE(pCmdQ->isCsrLocked); } diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp index c7bac440f4..87fb538093 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp @@ -7,14 +7,12 @@ #include "shared/source/command_stream/wait_status.h" #include "shared/test/common/mocks/mock_command_stream_receiver.h" -#include "shared/test/common/mocks/mock_timestamp_container.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/hw_test.h" #include "opencl/source/event/user_event.h" #include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h" #include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h" -#include "opencl/test/unit_test/mocks/mock_event.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_program.h" #include "opencl/test/unit_test/test_macros/test_checks_ocl.h" @@ -46,18 +44,12 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverBufferTests, givenMultipleEventInMu MockGraphicsAllocation svmAlloc(svmPtr, svmSize); Event event1(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 5, 15); - auto node1 = event1.getMultiRootTimestampSyncNode(); Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 6, 16); Event event3(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 4, 20); - auto node3 = event3.getMultiRootTimestampSyncNode(); Event event4(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 3, 4); - auto node4 = event4.getMultiRootTimestampSyncNode(); Event event5(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 2, 7); - auto node5 = event5.getMultiRootTimestampSyncNode(); UserEvent userEvent1(&pCmdQ1->getContext()); - userEvent1.getMultiRootTimestampSyncNode(); UserEvent userEvent2(&pCmdQ2->getContext()); - userEvent2.getMultiRootTimestampSyncNode(); userEvent1.setStatus(CL_COMPLETE); userEvent2.setStatus(CL_COMPLETE); @@ -94,12 +86,12 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverBufferTests, givenMultipleEventInMu EXPECT_EQ(2u, semaphores.size()); auto semaphoreCmd0 = genCmdCast(*(semaphores[0])); - EXPECT_EQ(1u, semaphoreCmd0->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(node4->getContextEndAddress(0u)), semaphoreCmd0->getSemaphoreGraphicsAddress()); + EXPECT_EQ(4u, semaphoreCmd0->getSemaphoreDataDword()); + EXPECT_EQ(reinterpret_cast(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress()); auto semaphoreCmd1 = genCmdCast(*(semaphores[1])); - EXPECT_EQ(1u, semaphoreCmd1->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(node5->getContextEndAddress(0u)), semaphoreCmd1->getSemaphoreGraphicsAddress()); + EXPECT_EQ(7u, semaphoreCmd1->getSemaphoreDataDword()); + EXPECT_EQ(reinterpret_cast(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress()); } { @@ -122,12 +114,12 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverBufferTests, givenMultipleEventInMu EXPECT_EQ(2u, semaphores.size()); auto semaphoreCmd0 = genCmdCast(*(semaphores[0])); - EXPECT_EQ(1u, semaphoreCmd0->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(node1->getContextEndAddress(0u)), semaphoreCmd0->getSemaphoreGraphicsAddress()); + EXPECT_EQ(15u, semaphoreCmd0->getSemaphoreDataDword()); + EXPECT_EQ(reinterpret_cast(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress()); auto semaphoreCmd1 = genCmdCast(*(semaphores[1])); - EXPECT_EQ(1u, semaphoreCmd1->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(node3->getContextEndAddress(0u)), semaphoreCmd1->getSemaphoreGraphicsAddress()); + EXPECT_EQ(20u, semaphoreCmd1->getSemaphoreDataDword()); + EXPECT_EQ(reinterpret_cast(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress()); } alignedFree(svmPtr); } @@ -154,24 +146,17 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverTests, givenMultipleEventInMultiRoo cl_device_id devices[] = {device1, device2, device3}; auto context = std::make_unique(ClDeviceVector(devices, 3), false); - auto mockTagAllocator = std::make_unique>(context->getRootDeviceIndices(), device1->getExecutionEnvironment()->memoryManager.get(), 10u); - std::unique_ptr uniquePtr(mockTagAllocator.release()); - context->setMultiRootDeviceTimestampPacketAllocator(uniquePtr); + auto pCmdQ1 = context->getSpecialQueue(1u); auto pCmdQ2 = context->getSpecialQueue(2u); auto pCmdQ3 = context->getSpecialQueue(3u); Event event1(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 5, 15); - auto node1 = event1.getMultiRootTimestampSyncNode(); Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 6, 16); Event event3(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 4, 20); - auto node3 = event3.getMultiRootTimestampSyncNode(); Event event4(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 3, 4); - auto node4 = event4.getMultiRootTimestampSyncNode(); Event event5(pCmdQ3, CL_COMMAND_NDRANGE_KERNEL, 7, 21); - auto node5 = event5.getMultiRootTimestampSyncNode(); Event event6(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 2, 7); - auto node6 = event6.getMultiRootTimestampSyncNode(); UserEvent userEvent1(&pCmdQ1->getContext()); UserEvent userEvent2(&pCmdQ2->getContext()); @@ -204,16 +189,16 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverTests, givenMultipleEventInMultiRoo EXPECT_EQ(3u, semaphores.size()); auto semaphoreCmd0 = genCmdCast(*(semaphores[0])); - EXPECT_EQ(1u, semaphoreCmd0->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(node4->getContextEndAddress(0u)), semaphoreCmd0->getSemaphoreGraphicsAddress()); + EXPECT_EQ(4u, semaphoreCmd0->getSemaphoreDataDword()); + EXPECT_EQ(reinterpret_cast(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress()); auto semaphoreCmd1 = genCmdCast(*(semaphores[1])); - EXPECT_EQ(1u, semaphoreCmd1->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(node5->getContextEndAddress(0u)), semaphoreCmd1->getSemaphoreGraphicsAddress()); + EXPECT_EQ(21u, semaphoreCmd1->getSemaphoreDataDword()); + EXPECT_EQ(reinterpret_cast(pCmdQ3->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress()); auto semaphoreCmd2 = genCmdCast(*(semaphores[2])); - EXPECT_EQ(1u, semaphoreCmd2->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(node6->getContextEndAddress(0u)), semaphoreCmd2->getSemaphoreGraphicsAddress()); + EXPECT_EQ(7u, semaphoreCmd2->getSemaphoreDataDword()); + EXPECT_EQ(reinterpret_cast(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd2->getSemaphoreGraphicsAddress()); } { @@ -229,16 +214,16 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverTests, givenMultipleEventInMultiRoo EXPECT_EQ(3u, semaphores.size()); auto semaphoreCmd0 = genCmdCast(*(semaphores[0])); - EXPECT_EQ(1u, semaphoreCmd0->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(node1->getContextEndAddress(0u)), semaphoreCmd0->getSemaphoreGraphicsAddress()); + EXPECT_EQ(15u, semaphoreCmd0->getSemaphoreDataDword()); + EXPECT_EQ(reinterpret_cast(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress()); auto semaphoreCmd1 = genCmdCast(*(semaphores[1])); - EXPECT_EQ(1u, semaphoreCmd1->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(node3->getContextEndAddress(0u)), semaphoreCmd1->getSemaphoreGraphicsAddress()); + EXPECT_EQ(20u, semaphoreCmd1->getSemaphoreDataDword()); + EXPECT_EQ(reinterpret_cast(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress()); auto semaphoreCmd2 = genCmdCast(*(semaphores[2])); - EXPECT_EQ(1u, semaphoreCmd2->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(node5->getContextEndAddress(0u)), semaphoreCmd2->getSemaphoreGraphicsAddress()); + EXPECT_EQ(21u, semaphoreCmd2->getSemaphoreDataDword()); + EXPECT_EQ(reinterpret_cast(pCmdQ3->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd2->getSemaphoreGraphicsAddress()); } { @@ -263,8 +248,8 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverTests, givenMultipleEventInMultiRoo EXPECT_EQ(1u, semaphores.size()); auto semaphoreCmd0 = genCmdCast(*(semaphores[0])); - EXPECT_EQ(1u, semaphoreCmd0->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(node1->getContextEndAddress(0u)), semaphoreCmd0->getSemaphoreGraphicsAddress()); + EXPECT_EQ(15u, semaphoreCmd0->getSemaphoreDataDword()); + EXPECT_EQ(reinterpret_cast(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress()); } } @@ -300,16 +285,11 @@ HWTEST_F(CrossDeviceDependenciesTests, givenMultipleEventInMultiRootDeviceEnviro using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; Event event1(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 5, 15); - event1.getMultiRootTimestampSyncNode(); Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 6, 16); Event event3(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 1, 6); - event3.getMultiRootTimestampSyncNode(); Event event4(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 4, 20); - event4.getMultiRootTimestampSyncNode(); Event event5(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 3, 4); - event5.getMultiRootTimestampSyncNode(); Event event6(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 2, 7); - event6.getMultiRootTimestampSyncNode(); UserEvent userEvent1(&pCmdQ1->getContext()); UserEvent userEvent2(&pCmdQ2->getContext()); @@ -335,10 +315,10 @@ HWTEST_F(CrossDeviceDependenciesTests, givenMultipleEventInMultiRootDeviceEnviro EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, nullptr); CsrDependencies csrDeps; - eventsRequest.fillCsrDependenciesForRootDevices(csrDeps, pCmdQ1->getGpgpuCommandStreamReceiver()); + eventsRequest.fillCsrDependenciesForTaskCountContainer(csrDeps, pCmdQ1->getGpgpuCommandStreamReceiver()); - //EXPECT_EQ(0u, csrDeps.taskCountContainer.size()); - EXPECT_EQ(0u, TimestampPacketHelper::getRequiredCmdStreamSizeForMultiRootDeviceSyncNodesContainer(csrDeps)); + EXPECT_EQ(0u, csrDeps.taskCountContainer.size()); + EXPECT_EQ(0u, TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer(csrDeps)); } { @@ -361,10 +341,10 @@ HWTEST_F(CrossDeviceDependenciesTests, givenMultipleEventInMultiRootDeviceEnviro EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, nullptr); CsrDependencies csrDeps; - eventsRequest.fillCsrDependenciesForRootDevices(csrDeps, pCmdQ2->getGpgpuCommandStreamReceiver()); + eventsRequest.fillCsrDependenciesForTaskCountContainer(csrDeps, pCmdQ2->getGpgpuCommandStreamReceiver()); - EXPECT_EQ(3u, csrDeps.multiRootTimeStampSyncContainer.size()); - EXPECT_EQ(3u * sizeof(MI_SEMAPHORE_WAIT), TimestampPacketHelper::getRequiredCmdStreamSizeForMultiRootDeviceSyncNodesContainer(csrDeps)); + EXPECT_EQ(3u, csrDeps.taskCountContainer.size()); + EXPECT_EQ(3u * sizeof(MI_SEMAPHORE_WAIT), TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer(csrDeps)); } } @@ -424,6 +404,8 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW EXPECT_EQ(0u, semaphores.size()); } userEvent1.setStatus(CL_COMPLETE); + event1->release(); + event2->release(); pCmdQ1->finish(); pCmdQ2->finish(); { @@ -434,7 +416,7 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW EXPECT_EQ(1u, semaphores.size()); auto semaphoreCmd = genCmdCast(*(semaphores[0])); EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(event2->getMultiRootDeviceTimestampPacketNodes()->peekNodes().at(0)->getContextEndAddress(0u)), semaphoreCmd->getSemaphoreGraphicsAddress()); + EXPECT_EQ(reinterpret_cast(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd->getSemaphoreGraphicsAddress()); } { HardwareParse csHwParser; @@ -443,11 +425,9 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW EXPECT_EQ(1u, semaphores.size()); auto semaphoreCmd = genCmdCast(*(semaphores[0])); - EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(event1->getMultiRootDeviceTimestampPacketNodes()->peekNodes().at(0)->getContextEndAddress(0u)), semaphoreCmd->getSemaphoreGraphicsAddress()); + EXPECT_EQ(0u, semaphoreCmd->getSemaphoreDataDword()); + EXPECT_EQ(reinterpret_cast(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd->getSemaphoreGraphicsAddress()); } - event1->release(); - event2->release(); buffer->release(); } @@ -609,6 +589,9 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW EXPECT_EQ(0u, semaphores.size()); } userEvent1.setStatus(CL_COMPLETE); + event1->release(); + event2->release(); + event3->release(); pCmdQ1->finish(); pCmdQ2->finish(); @@ -620,8 +603,7 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW EXPECT_EQ(1u, semaphores.size()); auto semaphoreCmd = genCmdCast(*(semaphores[0])); EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword()); - auto node = event2->getMultiRootDeviceTimestampPacketNodes()->peekNodes().at(0); - EXPECT_EQ(node->getGpuAddress() + node->getContextEndOffset(), semaphoreCmd->getSemaphoreGraphicsAddress()); + EXPECT_EQ(reinterpret_cast(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd->getSemaphoreGraphicsAddress()); } { HardwareParse csHwParser; @@ -637,9 +619,8 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW EXPECT_EQ(2u, semaphores.size()); auto semaphoreCmd0 = genCmdCast(*(semaphores[0])); - EXPECT_EQ(1u, semaphoreCmd0->getSemaphoreDataDword()); - auto node = event1->getMultiRootDeviceTimestampPacketNodes()->peekNodes().at(0); - EXPECT_EQ(node->getGpuAddress() + node->getContextEndOffset(), semaphoreCmd0->getSemaphoreGraphicsAddress()); + EXPECT_EQ(0u, semaphoreCmd0->getSemaphoreDataDword()); + EXPECT_EQ(reinterpret_cast(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress()); } { HardwareParse csHwParser; @@ -648,9 +629,6 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW EXPECT_LE(1u, semaphores.size()); } - event1->release(); - event2->release(); - event3->release(); buffer->release(); pCmdQ1->release(); pCmdQ2->release(); @@ -900,72 +878,3 @@ HWTEST_F(UltCommandStreamReceiverTest, givenDebugDisablingCacheFlushWhenAddingPi EXPECT_FALSE(pipeControl->getConstantCacheInvalidationEnable()); EXPECT_FALSE(pipeControl->getStateCacheInvalidationEnable()); } -HWTEST_F(CrossDeviceDependenciesTests, givenMultipleEventInMultiRootDeviceEnvironmentWhenTheyDoNotHaveMultiRootSyncNodeThenCsrDepsDoesNotHaveAnyMultiRootSyncContainer) { - Event event1(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 5, 15); - Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 6, 16); - Event event3(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 1, 6); - Event event4(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 4, 20); - Event event5(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 3, 4); - Event event6(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 2, 7); - UserEvent userEvent1(&pCmdQ1->getContext()); - UserEvent userEvent2(&pCmdQ2->getContext()); - - userEvent1.setStatus(CL_COMPLETE); - userEvent2.setStatus(CL_COMPLETE); - { - cl_event eventWaitList[] = - { - &event1, - &event2, - &event3, - &event4, - &event5, - &event6, - &userEvent1, - }; - cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); - - EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, nullptr); - CsrDependencies csrDeps; - eventsRequest.fillCsrDependenciesForRootDevices(csrDeps, pCmdQ2->getGpgpuCommandStreamReceiver()); - - EXPECT_EQ(0u, csrDeps.multiRootTimeStampSyncContainer.size()); - } -} -HWTEST_F(CrossDeviceDependenciesTests, givenMultipleEventInMultiRootDeviceEnvironmentWhenTheyDoNotHaveMultiRootSyncNodeContainersThenCsrDepsDoesNotHaveAnyMultiRootSyncContainer) { - - MockEvent event1(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 5, 15); - event1.multiRootDeviceTimestampPacketContainer.reset(new TimestampPacketContainer()); - MockEvent event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 6, 16); - MockEvent event3(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 4, 20); - event3.multiRootDeviceTimestampPacketContainer.reset(new TimestampPacketContainer()); - MockEvent event4(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 3, 4); - event4.multiRootDeviceTimestampPacketContainer.reset(new TimestampPacketContainer()); - MockEvent event5(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 3, 4); - event5.multiRootDeviceTimestampPacketContainer.reset(new TimestampPacketContainer()); - MockEvent event6(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 2, 7); - event6.multiRootDeviceTimestampPacketContainer.reset(new TimestampPacketContainer()); - UserEvent userEvent1(&pCmdQ1->getContext()); - - userEvent1.setStatus(CL_COMPLETE); - - { - cl_event eventWaitList[] = - { - &event1, - &event2, - &event3, - &event4, - &event5, - &event6, - &userEvent1, - }; - cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); - - EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, nullptr); - CsrDependencies csrDeps; - eventsRequest.fillCsrDependenciesForRootDevices(csrDeps, pCmdQ2->getGpgpuCommandStreamReceiver()); - - EXPECT_EQ(0u, csrDeps.multiRootTimeStampSyncContainer.size()); - } -} \ No newline at end of file diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp index add72377fc..f8945bb9c2 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp @@ -11,7 +11,6 @@ #include "shared/source/command_stream/wait_status.h" #include "shared/source/helpers/constants.h" #include "shared/source/helpers/logical_state_helper.h" -#include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" @@ -1217,19 +1216,19 @@ HWTEST_P(BcsDetaliedTestsWithParams, givenBltSizeWithLeftoverWhenDispatchedThenP auto allocation = buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex()); auto memoryManager = static_cast(pDevice->getMemoryManager()); memoryManager->returnFakeAllocation = true; - auto blitProperties = BlitProperties::constructPropertiesForReadWrite(std::get<1>(GetParam()), // blitDirection - csr, allocation, // commandStreamReceiver - nullptr, // memObjAllocation - hostPtr, // preallocatedHostAllocation - allocation->getGpuAddress(), // memObjGpuVa - 0, // hostAllocGpuVa - hostPtrOffset, // hostPtrOffset - copyOffset, // copyOffset - bltSize, // copySize - dstRowPitch, // hostRowPitch - dstSlicePitch, // hostSlicePitch - srcRowPitch, // gpuRowPitch - srcSlicePitch // gpuSlicePitch + auto blitProperties = BlitProperties::constructPropertiesForReadWrite(std::get<1>(GetParam()), //blitDirection + csr, allocation, //commandStreamReceiver + nullptr, //memObjAllocation + hostPtr, //preallocatedHostAllocation + allocation->getGpuAddress(), //memObjGpuVa + 0, //hostAllocGpuVa + hostPtrOffset, //hostPtrOffset + copyOffset, //copyOffset + bltSize, //copySize + dstRowPitch, //hostRowPitch + dstSlicePitch, //hostSlicePitch + srcRowPitch, //gpuRowPitch + srcSlicePitch //gpuSlicePitch ); memoryManager->returnFakeAllocation = false; @@ -1322,19 +1321,19 @@ HWTEST_P(BcsDetaliedTestsWithParams, givenBltSizeWithLeftoverWhenDispatchedThenP auto memoryManager = static_cast(pDevice->getMemoryManager()); memoryManager->returnFakeAllocation = true; - auto blitProperties = BlitProperties::constructPropertiesForReadWrite(std::get<1>(GetParam()), // blitDirection - csr, allocation, // commandStreamReceiver - nullptr, // memObjAllocation - hostPtr, // preallocatedHostAllocation - allocation->getGpuAddress(), // memObjGpuVa - 0, // hostAllocGpuVa - hostPtrOffset, // hostPtrOffset - copyOffset, // copyOffset - bltSize, // copySize - dstRowPitch, // hostRowPitch - dstSlicePitch, // hostSlicePitch - srcRowPitch, // gpuRowPitch - srcSlicePitch // gpuSlicePitch + auto blitProperties = BlitProperties::constructPropertiesForReadWrite(std::get<1>(GetParam()), //blitDirection + csr, allocation, //commandStreamReceiver + nullptr, //memObjAllocation + hostPtr, //preallocatedHostAllocation + allocation->getGpuAddress(), //memObjGpuVa + 0, //hostAllocGpuVa + hostPtrOffset, //hostPtrOffset + copyOffset, //copyOffset + bltSize, //copySize + dstRowPitch, //hostRowPitch + dstSlicePitch, //hostSlicePitch + srcRowPitch, //gpuRowPitch + srcSlicePitch //gpuSlicePitch ); memoryManager->returnFakeAllocation = false; @@ -1418,16 +1417,16 @@ HWTEST_P(BcsDetaliedTestsWithParams, givenBltSizeWithLeftoverWhenDispatchedThenP size_t buffer2SlicePitch = std::get<0>(GetParam()).srcSlicePitch; auto allocation = buffer1->getGraphicsAllocation(pDevice->getRootDeviceIndex()); - auto blitProperties = BlitProperties::constructPropertiesForCopy(allocation, // dstAllocation - allocation, // srcAllocation - buffer1Offset, // dstOffset - buffer2Offset, // srcOffset - bltSize, // copySize - buffer1RowPitch, // srcRowPitch - buffer1SlicePitch, // srcSlicePitch - buffer2RowPitch, // dstRowPitch - buffer2SlicePitch, // dstSlicePitch - csr.getClearColorAllocation() // clearColorAllocation + auto blitProperties = BlitProperties::constructPropertiesForCopy(allocation, //dstAllocation + allocation, //srcAllocation + buffer1Offset, //dstOffset + buffer2Offset, //srcOffset + bltSize, //copySize + buffer1RowPitch, //srcRowPitch + buffer1SlicePitch, //srcSlicePitch + buffer2RowPitch, //dstRowPitch + buffer2SlicePitch, //dstSlicePitch + csr.getClearColorAllocation() //clearColorAllocation ); flushBcsTask(&csr, blitProperties, true, *pDevice); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp index 2d4c5193ac..78b1a13ac9 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp @@ -1789,4 +1789,4 @@ HWTEST_F(BcsTests, givenHostPtrToImageWhenBlitBufferIsCalledThenBlitCmdIsFound) hwParser.parseCommands(csr.commandStream, 0); auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); EXPECT_NE(hwParser.cmdList.end(), cmdIterator); -} \ No newline at end of file +} diff --git a/opencl/test/unit_test/event/event_builder_tests.cpp b/opencl/test/unit_test/event/event_builder_tests.cpp index f5f3e0f065..28d1bcb19b 100644 --- a/opencl/test/unit_test/event/event_builder_tests.cpp +++ b/opencl/test/unit_test/event/event_builder_tests.cpp @@ -79,7 +79,7 @@ TEST(EventBuilder, givenVirtualEventWithCommandThenFinalizeAddChild) { public: using CommandComputeKernel::eventsWaitlist; MockCommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr &kernelOperation, std::vector &surfaces, Kernel *kernel) - : CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0, nullptr) {} + : CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0) {} }; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); @@ -129,7 +129,7 @@ TEST(EventBuilder, givenVirtualEventWithSubmittedCommandAsParentThenFinalizeNotA public: using CommandComputeKernel::eventsWaitlist; MockCommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr &kernelOperation, std::vector &surfaces, Kernel *kernel) - : CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0, nullptr) {} + : CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0) {} }; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); diff --git a/opencl/test/unit_test/event/event_tests.cpp b/opencl/test/unit_test/event/event_tests.cpp index 05e28a134f..022ad0a1c1 100644 --- a/opencl/test/unit_test/event/event_tests.cpp +++ b/opencl/test/unit_test/event/event_tests.cpp @@ -483,7 +483,7 @@ TEST_F(InternalsEventTest, GivenSubmitCommandFalseWhenSubmittingCommandsThenRefA PreemptionMode preemptionMode = pDevice->getPreemptionMode(); v.push_back(bufferSurf); - auto cmd = new CommandComputeKernel(cmdQ, blockedCommandsData, v, false, false, false, nullptr, preemptionMode, pKernel, 1, nullptr); + auto cmd = new CommandComputeKernel(cmdQ, blockedCommandsData, v, false, false, false, nullptr, preemptionMode, pKernel, 1); event.setCommand(std::unique_ptr(cmd)); auto taskLevelBefore = csr.peekTaskLevel(); @@ -526,7 +526,7 @@ TEST_F(InternalsEventTest, GivenSubmitCommandTrueWhenSubmittingCommandsThenRefAp NullSurface *surface = new NullSurface; v.push_back(surface); PreemptionMode preemptionMode = pDevice->getPreemptionMode(); - auto cmd = new CommandComputeKernel(cmdQ, blockedCommandsData, v, false, false, false, nullptr, preemptionMode, pKernel, 1, nullptr); + auto cmd = new CommandComputeKernel(cmdQ, blockedCommandsData, v, false, false, false, nullptr, preemptionMode, pKernel, 1); event.setCommand(std::unique_ptr(cmd)); auto taskLevelBefore = csr.peekTaskLevel(); @@ -577,7 +577,7 @@ TEST_F(InternalsEventTest, givenBlockedKernelWithPrintfWhenSubmittedThenPrintOut std::vector v; PreemptionMode preemptionMode = pDevice->getPreemptionMode(); - auto cmd = new CommandComputeKernel(mockCmdQueue, blockedCommandsData, v, false, false, false, std::move(printfHandler), preemptionMode, pKernel, 1, nullptr); + auto cmd = new CommandComputeKernel(mockCmdQueue, blockedCommandsData, v, false, false, false, std::move(printfHandler), preemptionMode, pKernel, 1); event.setCommand(std::unique_ptr(cmd)); event.submitCommand(false); @@ -629,7 +629,7 @@ TEST_F(InternalsEventTest, givenGpuHangOnCmdQueueWaitFunctionAndBlockedKernelWit std::vector v; PreemptionMode preemptionMode = pDevice->getPreemptionMode(); - auto cmd = new CommandComputeKernel(mockCmdQueue, blockedCommandsData, v, false, false, false, std::move(printfHandler), preemptionMode, pKernel, 1, nullptr); + auto cmd = new CommandComputeKernel(mockCmdQueue, blockedCommandsData, v, false, false, false, std::move(printfHandler), preemptionMode, pKernel, 1); event.setCommand(std::unique_ptr(cmd)); event.submitCommand(false); @@ -678,7 +678,7 @@ TEST_F(InternalsEventTest, givenGpuHangOnPrintingEnqueueOutputAndBlockedKernelWi std::vector v; PreemptionMode preemptionMode = pDevice->getPreemptionMode(); - auto cmd = new CommandComputeKernel(mockCmdQueue, blockedCommandsData, v, false, false, false, std::move(printfHandler), preemptionMode, pKernel, 1, nullptr); + auto cmd = new CommandComputeKernel(mockCmdQueue, blockedCommandsData, v, false, false, false, std::move(printfHandler), preemptionMode, pKernel, 1); event.setCommand(std::unique_ptr(cmd)); event.submitCommand(false); @@ -1167,7 +1167,7 @@ HWTEST_F(EventTest, givenVirtualEventWhenCommandSubmittedThenLockCsrOccurs) { public: using CommandComputeKernel::eventsWaitlist; MockCommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr &kernelOperation, std::vector &surfaces, Kernel *kernel) - : CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0, nullptr) {} + : CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0) {} }; class MockEvent : public Event { public: @@ -1748,7 +1748,7 @@ HWTEST_F(InternalsEventTest, givenAbortedCommandWhenSubmitCalledThenDontUpdateFl blockedCommandsData->setHeaps(dsh, ioh, ssh); PreemptionMode preemptionMode = pDevice->getPreemptionMode(); std::vector v; - auto cmd = new CommandComputeKernel(*pCmdQ, blockedCommandsData, v, false, false, false, nullptr, preemptionMode, pKernel, 1, nullptr); + auto cmd = new CommandComputeKernel(*pCmdQ, blockedCommandsData, v, false, false, false, nullptr, preemptionMode, pKernel, 1); event->setCommand(std::unique_ptr(cmd)); FlushStamp expectedFlushStamp = 0; diff --git a/opencl/test/unit_test/gtpin/gtpin_tests.cpp b/opencl/test/unit_test/gtpin/gtpin_tests.cpp index 77e18d5cd8..1cdcdba559 100644 --- a/opencl/test/unit_test/gtpin/gtpin_tests.cpp +++ b/opencl/test/unit_test/gtpin/gtpin_tests.cpp @@ -2434,7 +2434,7 @@ HWTEST_F(GTPinTests, givenGtPinInitializedWhenSubmittingKernelCommandThenFlushed gtpinNotifyKernelSubmit(kernel.mockMultiDeviceKernel, mockCmdQ.get()); - std::unique_ptr command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, ndRangeKernel, nullptr, preemptionMode, kernel, 1, nullptr)); + std::unique_ptr command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, ndRangeKernel, nullptr, preemptionMode, kernel, 1)); CompletionStamp stamp = command->submit(20, false); ASSERT_EQ(1u, kernelExecQueue.size()); diff --git a/opencl/test/unit_test/helpers/task_information_tests.cpp b/opencl/test/unit_test/helpers/task_information_tests.cpp index 404a69b640..ead89cf244 100644 --- a/opencl/test/unit_test/helpers/task_information_tests.cpp +++ b/opencl/test/unit_test/helpers/task_information_tests.cpp @@ -154,7 +154,7 @@ TEST(CommandTest, givenWaitlistRequestWhenCommandComputeKernelIsCreatedThenMakeL public: using CommandComputeKernel::eventsWaitlist; MockCommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr &kernelOperation, std::vector &surfaces, Kernel *kernel) - : CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0, nullptr) {} + : CommandComputeKernel(commandQueue, kernelOperation, surfaces, false, false, false, nullptr, PreemptionMode::Disabled, kernel, 0) {} }; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); @@ -291,7 +291,7 @@ HWTEST_F(DispatchFlagsTests, givenCommandComputeKernelWhenSubmitThenPassCorrectD for (auto &surface : surfaces) { requiresCoherency |= surface->IsCoherent; } - std::unique_ptr command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, ndRangeKernel, nullptr, preemptionMode, kernel, 1, nullptr)); + std::unique_ptr command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, ndRangeKernel, nullptr, preemptionMode, kernel, 1)); command->submit(20, false); EXPECT_FALSE(mockCsr->passedDispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode); @@ -339,7 +339,7 @@ HWTEST_F(DispatchFlagsTests, givenClCommandCopyImageWhenSubmitThenFlushTextureCa for (auto &surface : surfaces) { requiresCoherency |= surface->IsCoherent; } - std::unique_ptr command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, commandType, nullptr, preemptionMode, kernel, 1, nullptr)); + std::unique_ptr command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, commandType, nullptr, preemptionMode, kernel, 1)); command->submit(20, false); EXPECT_FALSE(mockCsr->passedDispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode); @@ -425,7 +425,7 @@ HWTEST_F(DispatchFlagsTests, givenCommandComputeKernelWhenSubmitThenPassCorrectD bool flushDC = false; bool slmUsed = false; bool ndRangeKernel = false; - std::unique_ptr command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, ndRangeKernel, nullptr, preemptionMode, kernel, 1, nullptr)); + std::unique_ptr command(new CommandComputeKernel(*mockCmdQ, kernelOperation, surfaces, flushDC, slmUsed, ndRangeKernel, nullptr, preemptionMode, kernel, 1)); command->submit(20, false); EXPECT_TRUE(mockCsr->passedDispatchFlags.epilogueRequired); diff --git a/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp b/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp index d4e224a033..529be63491 100644 --- a/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp +++ b/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp @@ -34,11 +34,11 @@ HWCMDTEST_F(IGFX_GEN8_CORE, TimestampPacketTests, givenTimestampPacketWriteEnabl auto mockCmdQHw = std::make_unique>(context, device.get(), nullptr); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; - getCommandStream(*mockCmdQHw, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false, nullptr); + getCommandStream(*mockCmdQHw, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false); auto sizeWithDisabled = mockCmdQHw->requestedCmdStreamSize; device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; - getCommandStream(*mockCmdQHw, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false, nullptr); + getCommandStream(*mockCmdQHw, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false); auto sizeWithEnabled = mockCmdQHw->requestedCmdStreamSize; auto extendedSize = sizeWithDisabled + sizeof(typename FamilyType::PIPE_CONTROL); @@ -52,7 +52,7 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledAndOoqWhenEstimat device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; getCommandStream(*mockCmdQHw, CsrDependencies(), false, false, - false, multiDispatchInfo, nullptr, 0, false, false, nullptr); + false, multiDispatchInfo, nullptr, 0, false, false); auto sizeWithDisabled = mockCmdQHw->requestedCmdStreamSize; device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; @@ -82,7 +82,7 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledAndOoqWhenEstimat eventsRequest.fillCsrDependenciesForTimestampPacketContainer( csrDeps, device->getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr); - getCommandStream(*mockCmdQHw, csrDeps, false, false, false, multiDispatchInfo, nullptr, 0, false, false, nullptr); + getCommandStream(*mockCmdQHw, csrDeps, false, false, false, multiDispatchInfo, nullptr, 0, false, false); auto sizeWithEnabled = mockCmdQHw->requestedCmdStreamSize; size_t sizeForNodeDependency = 0; @@ -143,7 +143,7 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEstimatingStr auto mockCmdQHw = std::make_unique>(context, device.get(), nullptr); device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = false; - getCommandStream(*mockCmdQHw, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false, nullptr); + getCommandStream(*mockCmdQHw, CsrDependencies(), false, false, false, multiDispatchInfo, nullptr, 0, false, false); auto sizeWithDisabled = mockCmdQHw->requestedCmdStreamSize; device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; @@ -172,7 +172,7 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEstimatingStr CsrDependencies csrDeps; eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, device->getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr); - getCommandStream(*mockCmdQHw, csrDeps, false, false, false, multiDispatchInfo, nullptr, 0, false, false, nullptr); + getCommandStream(*mockCmdQHw, csrDeps, false, false, false, multiDispatchInfo, nullptr, 0, false, false); auto sizeWithEnabled = mockCmdQHw->requestedCmdStreamSize; size_t sizeForNodeDependency = 0; diff --git a/opencl/test/unit_test/kernel/kernel_cache_flush_requirements_tests.cpp b/opencl/test/unit_test/kernel/kernel_cache_flush_requirements_tests.cpp index a996fba7bc..7f5bffca9e 100644 --- a/opencl/test/unit_test/kernel/kernel_cache_flush_requirements_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_cache_flush_requirements_tests.cpp @@ -212,7 +212,7 @@ HWTEST2_F(KernelWithCacheFlushTests, givenCacheFlushRequiredWhenEstimatingThenAd { EXPECT_FALSE(mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ)); - initialSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, csrDeps, false, false, false, *cmdQ, multiDispatchInfo, false, false, nullptr); + initialSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, csrDeps, false, false, false, *cmdQ, multiDispatchInfo, false, false); } { @@ -226,7 +226,7 @@ HWTEST2_F(KernelWithCacheFlushTests, givenCacheFlushRequiredWhenEstimatingThenAd ultCsr.multiOsContextCapable = false; EXPECT_TRUE(mockKernel->mockKernel->Kernel::requiresCacheFlushCommand(*cmdQ)); - sizeWithCacheFlush = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, csrDeps, false, false, false, *cmdQ, multiDispatchInfo, false, false, nullptr); + sizeWithCacheFlush = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, csrDeps, false, false, false, *cmdQ, multiDispatchInfo, false, false); } EXPECT_EQ(initialSize + expectedDiff, sizeWithCacheFlush); diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index a6c4ea6b03..c3c1317286 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -503,7 +503,6 @@ class CommandStreamReceiverMock : public CommandStreamReceiver { using BaseClass::CommandStreamReceiver; TagAllocatorBase *getTimestampPacketAllocator() override { return nullptr; } - std::unique_ptr createMultiRootDeviceTimestampPacketAllocator(const RootDeviceIndicesContainer rootDeviceIndices) override { return std::unique_ptr(nullptr); } SubmissionStatus flushTagUpdate() override { return SubmissionStatus::SUCCESS; }; void updateTagFromWait() override{}; diff --git a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp index 6d6bb63fe2..de4f8e5f14 100644 --- a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp @@ -765,11 +765,11 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBufferOperationWithoutKernelWhenEstimati auto &hwInfo = cmdQ->getDevice().getHardwareInfo(); auto readBufferCmdsSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_READ_BUFFER, csrDependencies, false, false, - true, *cmdQ, multiDispatchInfo, false, false, nullptr); + true, *cmdQ, multiDispatchInfo, false, false); auto writeBufferCmdsSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_WRITE_BUFFER, csrDependencies, false, false, - true, *cmdQ, multiDispatchInfo, false, false, nullptr); + true, *cmdQ, multiDispatchInfo, false, false); auto copyBufferCmdsSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_COPY_BUFFER, csrDependencies, false, false, - true, *cmdQ, multiDispatchInfo, false, false, nullptr); + true, *cmdQ, multiDispatchInfo, false, false); auto expectedSize = TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue(); if (cmdQ->isCacheFlushForBcsRequired()) { diff --git a/opencl/test/unit_test/mocks/mock_command_queue.h b/opencl/test/unit_test/mocks/mock_command_queue.h index a0f9db020e..1fe5fc2f00 100644 --- a/opencl/test/unit_test/mocks/mock_command_queue.h +++ b/opencl/test/unit_test/mocks/mock_command_queue.h @@ -248,7 +248,6 @@ class MockCommandQueueHw : public CommandQueueHw { using BaseClass::latestSentEnqueueType; using BaseClass::obtainCommandStream; using BaseClass::obtainNewTimestampPacketNodes; - using BaseClass::processDispatchForKernels; using BaseClass::requiresCacheFlushAfterWalker; using BaseClass::throttle; using BaseClass::timestampPacketContainer; diff --git a/opencl/test/unit_test/mocks/mock_event.h b/opencl/test/unit_test/mocks/mock_event.h index c579b42596..376e638f39 100644 --- a/opencl/test/unit_test/mocks/mock_event.h +++ b/opencl/test/unit_test/mocks/mock_event.h @@ -39,7 +39,6 @@ struct MockEvent : public BaseEventType { using Event::calculateSubmitTimestampData; using Event::isWaitForTimestampsEnabled; using Event::magic; - using Event::multiRootDeviceTimestampPacketContainer; using Event::queueTimeStamp; using Event::submitTimeStamp; using Event::timestampPacketContainer; diff --git a/opencl/test/unit_test/profiling/profiling_tests.cpp b/opencl/test/unit_test/profiling/profiling_tests.cpp index 49467a6ce7..d456c5792d 100644 --- a/opencl/test/unit_test/profiling/profiling_tests.cpp +++ b/opencl/test/unit_test/profiling/profiling_tests.cpp @@ -71,13 +71,13 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfilingAndFor MultiDispatchInfo multiDispatchInfo(&kernel); auto &commandStreamNDRangeKernel = getCommandStream(*pCmdQ, CsrDependencies(), true, false, false, - multiDispatchInfo, nullptr, 0, false, false, nullptr); + multiDispatchInfo, nullptr, 0, false, false); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, true, false, *pCmdQ, &kernel, {}); EXPECT_GE(expectedSizeCS, requiredSize); EXPECT_GE(commandStreamNDRangeKernel.getAvailableSpace(), requiredSize); auto &commandStreamTask = getCommandStream(*pCmdQ, CsrDependencies(), true, false, false, - multiDispatchInfo, nullptr, 0, false, false, nullptr); + multiDispatchInfo, nullptr, 0, false, false); expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_TASK, true, false, *pCmdQ, &kernel, {}); EXPECT_GE(expectedSizeCS, requiredSize); EXPECT_GE(commandStreamTask.getAvailableSpace(), requiredSize); @@ -93,13 +93,13 @@ HWTEST_F(ProfilingTests, GivenCommandQueueWithProfilingAndForWorkloadWithNoKerne MultiDispatchInfo multiDispatchInfo(nullptr); auto &commandStreamMigrateMemObjects = getCommandStream(*pCmdQ, CsrDependencies(), true, false, false, - multiDispatchInfo, nullptr, 0, false, false, nullptr); + multiDispatchInfo, nullptr, 0, false, false); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_MIGRATE_MEM_OBJECTS, true, false, *pCmdQ, nullptr, {}); EXPECT_GE(expectedSizeCS, requiredSize); EXPECT_GE(commandStreamMigrateMemObjects.getAvailableSpace(), requiredSize); auto &commandStreamMarker = getCommandStream(*pCmdQ, CsrDependencies(), true, - false, false, multiDispatchInfo, nullptr, 0, false, false, nullptr); + false, false, multiDispatchInfo, nullptr, 0, false, false); expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_MARKER, true, false, *pCmdQ, nullptr, {}); EXPECT_GE(expectedSizeCS, requiredSize); EXPECT_GE(commandStreamMarker.getAvailableSpace(), requiredSize); @@ -121,9 +121,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProfilingAndFor multiDispatchInfo.push(dispatchInfo); multiDispatchInfo.push(dispatchInfo); auto &commandStreamTask = getCommandStream(*pCmdQ, CsrDependencies(), true, false, false, - multiDispatchInfo, nullptr, 0, false, false, nullptr); + multiDispatchInfo, nullptr, 0, false, false); auto expectedSizeCS = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_TASK, CsrDependencies(), true, false, - false, *pCmdQ, multiDispatchInfo, false, false, nullptr); + false, *pCmdQ, multiDispatchInfo, false, false); EXPECT_GE(expectedSizeCS, requiredSize); EXPECT_GE(commandStreamTask.getAvailableSpace(), requiredSize); } @@ -741,13 +741,13 @@ HWTEST_F(ProfilingWithPerfCountersTests, GivenCommandQueueWithProfilingPerfCount MultiDispatchInfo multiDispatchInfo(nullptr); auto &commandStreamMigrateMemObjects = getCommandStream(*pCmdQ, CsrDependencies(), true, true, false, multiDispatchInfo, - nullptr, 0, false, false, nullptr); + nullptr, 0, false, false); auto expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_MIGRATE_MEM_OBJECTS, true, true, *pCmdQ, nullptr, {}); EXPECT_GE(expectedSizeCS, requiredSize); EXPECT_GE(commandStreamMigrateMemObjects.getAvailableSpace(), requiredSize); auto &commandStreamMarker = getCommandStream(*pCmdQ, CsrDependencies(), true, true, false, - multiDispatchInfo, nullptr, 0, false, false, nullptr); + multiDispatchInfo, nullptr, 0, false, false); expectedSizeCS = EnqueueOperation::getSizeRequiredCS(CL_COMMAND_MARKER, true, true, *pCmdQ, nullptr, {}); EXPECT_GE(expectedSizeCS, requiredSize); EXPECT_GE(commandStreamMarker.getAvailableSpace(), requiredSize); diff --git a/opencl/test/unit_test/xe_hpc_core/pvc/command_stream_receiver_hw_tests_pvc.cpp b/opencl/test/unit_test/xe_hpc_core/pvc/command_stream_receiver_hw_tests_pvc.cpp index 9f929df14f..e75268af99 100644 --- a/opencl/test/unit_test/xe_hpc_core/pvc/command_stream_receiver_hw_tests_pvc.cpp +++ b/opencl/test/unit_test/xe_hpc_core/pvc/command_stream_receiver_hw_tests_pvc.cpp @@ -243,14 +243,10 @@ PVCTEST_F(PvcMultiRootDeviceCommandStreamReceiverBufferTests, givenMultipleEvent MockGraphicsAllocation svmAlloc(svmPtr, svmSize); Event event1(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 5, 15); - auto node1 = event1.getMultiRootTimestampSyncNode(); Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, 6, 16); Event event3(pCmdQ1, CL_COMMAND_NDRANGE_KERNEL, 4, 20); - auto node3 = event3.getMultiRootTimestampSyncNode(); Event event4(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 3, 4); - auto node4 = event4.getMultiRootTimestampSyncNode(); Event event5(pCmdQ2, CL_COMMAND_NDRANGE_KERNEL, 2, 7); - auto node5 = event5.getMultiRootTimestampSyncNode(); UserEvent userEvent1(&pCmdQ1->getContext()); UserEvent userEvent2(&pCmdQ2->getContext()); @@ -289,12 +285,12 @@ PVCTEST_F(PvcMultiRootDeviceCommandStreamReceiverBufferTests, givenMultipleEvent EXPECT_EQ(3u, semaphores.size()); auto semaphoreCmd0 = genCmdCast(*(semaphores[0])); - EXPECT_EQ(1u, semaphoreCmd0->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(node4->getContextEndAddress(0u)), semaphoreCmd0->getSemaphoreGraphicsAddress()); + EXPECT_EQ(4u, semaphoreCmd0->getSemaphoreDataDword()); + EXPECT_EQ(reinterpret_cast(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress()); auto semaphoreCmd1 = genCmdCast(*(semaphores[1])); - EXPECT_EQ(1u, semaphoreCmd1->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(node5->getContextEndAddress(0u)), semaphoreCmd1->getSemaphoreGraphicsAddress()); + EXPECT_EQ(7u, semaphoreCmd1->getSemaphoreDataDword()); + EXPECT_EQ(reinterpret_cast(pCmdQ2->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress()); } { @@ -317,12 +313,12 @@ PVCTEST_F(PvcMultiRootDeviceCommandStreamReceiverBufferTests, givenMultipleEvent EXPECT_EQ(3u, semaphores.size()); auto semaphoreCmd0 = genCmdCast(*(semaphores[0])); - EXPECT_EQ(1u, semaphoreCmd0->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(node1->getContextEndAddress(0u)), semaphoreCmd0->getSemaphoreGraphicsAddress()); + EXPECT_EQ(15u, semaphoreCmd0->getSemaphoreDataDword()); + EXPECT_EQ(reinterpret_cast(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd0->getSemaphoreGraphicsAddress()); auto semaphoreCmd1 = genCmdCast(*(semaphores[1])); - EXPECT_EQ(1u, semaphoreCmd1->getSemaphoreDataDword()); - EXPECT_EQ(reinterpret_cast(node3->getContextEndAddress(0u)), semaphoreCmd1->getSemaphoreGraphicsAddress()); + EXPECT_EQ(20u, semaphoreCmd1->getSemaphoreDataDword()); + EXPECT_EQ(reinterpret_cast(pCmdQ1->getGpgpuCommandStreamReceiver().getTagAddress()), semaphoreCmd1->getSemaphoreGraphicsAddress()); } alignedFree(svmPtr); } diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index bb184ace2f..b91ca820ac 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -221,7 +221,6 @@ class CommandStreamReceiver { TagAllocatorBase *getEventTsAllocator(); TagAllocatorBase *getEventPerfCountAllocator(const uint32_t tagSize); virtual TagAllocatorBase *getTimestampPacketAllocator() = 0; - virtual std::unique_ptr createMultiRootDeviceTimestampPacketAllocator(const RootDeviceIndicesContainer rootDeviceIndices) = 0; virtual bool expectMemory(const void *gfxAddress, const void *srcAddress, size_t length, uint32_t compareOperation); diff --git a/shared/source/command_stream/command_stream_receiver_hw.h b/shared/source/command_stream/command_stream_receiver_hw.h index 38cec769c6..730a909b7c 100644 --- a/shared/source/command_stream/command_stream_receiver_hw.h +++ b/shared/source/command_stream/command_stream_receiver_hw.h @@ -134,7 +134,6 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { GraphicsAllocation *getClearColorAllocation() override; TagAllocatorBase *getTimestampPacketAllocator() override; - std::unique_ptr createMultiRootDeviceTimestampPacketAllocator(const RootDeviceIndicesContainer rootDeviceIndices) override; void postInitFlagsSetup() override; void programActivePartitionConfig(LinearStream &csr); diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index 7c355910a3..c48bb794a2 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -403,7 +403,7 @@ CompletionStamp CommandStreamReceiverHw::flushTask( auto commandStreamStartCSR = commandStreamCSR.getUsed(); TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer(commandStreamCSR, dispatchFlags.csrDependencies); - TimestampPacketHelper::programCsrDependenciesForForMultiRootDeviceSyncContainer(commandStreamCSR, dispatchFlags.csrDependencies); + TimestampPacketHelper::programCsrDependenciesForForTaskCountContainer(commandStreamCSR, dispatchFlags.csrDependencies); programActivePartitionConfigFlushTask(commandStreamCSR); programEngineModeCommands(commandStreamCSR, dispatchFlags); @@ -977,7 +977,7 @@ size_t CommandStreamReceiverHw::getRequiredCmdStreamSize(const Dispat } size += TimestampPacketHelper::getRequiredCmdStreamSize(dispatchFlags.csrDependencies); - size += TimestampPacketHelper::getRequiredCmdStreamSizeForMultiRootDeviceSyncNodesContainer(dispatchFlags.csrDependencies); + size += TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer(dispatchFlags.csrDependencies); size += EncodeKernelArgsBuffer::getKernelArgsBufferCmdsSize(kernelArgsBufferAllocation, logicalStateHelper.get()); @@ -1193,7 +1193,7 @@ TaskCountType CommandStreamReceiverHw::flushBcsTask(const BlitPropert for (auto &blitProperties : blitPropertiesContainer) { TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer(commandStream, blitProperties.csrDependencies); - TimestampPacketHelper::programCsrDependenciesForForMultiRootDeviceSyncContainer(commandStream, blitProperties.csrDependencies); + TimestampPacketHelper::programCsrDependenciesForForTaskCountContainer(commandStream, blitProperties.csrDependencies); BlitCommandsHelper::encodeWa(commandStream, blitProperties, latestSentBcsWaValue); @@ -1226,12 +1226,6 @@ TaskCountType CommandStreamReceiverHw::flushBcsTask(const BlitPropert if (blitProperties.clearColorAllocation) { makeResident(*blitProperties.clearColorAllocation); } - if (blitProperties.multiRootDeviceEventSync != nullptr) { - MiFlushArgs args; - args.commandWithPostSync = true; - args.notifyEnable = isUsedNotifyEnableForPostSync(); - EncodeMiFlushDW::programMiFlushDw(commandStream, blitProperties.multiRootDeviceEventSync->getGpuAddress() + blitProperties.multiRootDeviceEventSync->getContextEndOffset(), std::numeric_limits::max(), args, hwInfo); - } } BlitCommandsHelper::programGlobalSequencerFlush(commandStream); @@ -1248,6 +1242,7 @@ TaskCountType CommandStreamReceiverHw::flushBcsTask(const BlitPropert MemorySynchronizationCommands::addAdditionalSynchronization(commandStream, tagAllocation->getGpuAddress(), false, peekHwInfo()); } + if (PauseOnGpuProperties::pauseModeAllowed(DebugManager.flags.PauseOnBlitCopy.get(), taskCount, PauseOnGpuProperties::PauseMode::AfterWorkload)) { BlitCommandsHelper::dispatchDebugPauseCommands(commandStream, getDebugPauseStateGPUAddress(), DebugPauseState::waitingForUserEndConfirmation, @@ -1524,11 +1519,6 @@ TagAllocatorBase *CommandStreamReceiverHw::getTimestampPacketAllocato return timestampPacketAllocator.get(); } -template -std::unique_ptr CommandStreamReceiverHw::createMultiRootDeviceTimestampPacketAllocator(const RootDeviceIndicesContainer rootDeviceIndices) { - auto &gfxCoreHelper = getGfxCoreHelper(); - return gfxCoreHelper.createTimestampPacketAllocator(rootDeviceIndices, getMemoryManager(), getPreferredTagPoolSize(), getType(), osContext->getDeviceBitfield()); -} template void CommandStreamReceiverHw::postInitFlagsSetup() { useNewResourceImplicitFlush = checkPlatformSupportsNewResourceImplicitFlush(); diff --git a/shared/source/command_stream/csr_deps.h b/shared/source/command_stream/csr_deps.h index 7ba5c46615..a601179e88 100644 --- a/shared/source/command_stream/csr_deps.h +++ b/shared/source/command_stream/csr_deps.h @@ -22,7 +22,7 @@ class CsrDependencies { All }; - StackVec multiRootTimeStampSyncContainer; + StackVec, 32> taskCountContainer; StackVec timestampPacketContainer; void makeResident(CommandStreamReceiver &commandStreamReceiver) const; diff --git a/shared/source/helpers/blit_commands_helper.cpp b/shared/source/helpers/blit_commands_helper.cpp index b3c198eacc..57cb67c5de 100644 --- a/shared/source/helpers/blit_commands_helper.cpp +++ b/shared/source/helpers/blit_commands_helper.cpp @@ -50,7 +50,6 @@ BlitProperties BlitProperties::constructPropertiesForReadWrite(BlitterConstants: BlitterConstants::BlitDirection::HostPtrToImage == blitDirection) { return { nullptr, // outputTimestampPacket - nullptr, // multiRootDeviceEventSync blitDirection, // blitDirection {}, // csrDependencies AuxTranslationDirection::None, // auxTranslationDirection @@ -74,7 +73,6 @@ BlitProperties BlitProperties::constructPropertiesForReadWrite(BlitterConstants: } else { return { nullptr, // outputTimestampPacket - nullptr, // multiRootDeviceEventSync blitDirection, // blitDirection {}, // csrDependencies AuxTranslationDirection::None, // auxTranslationDirection @@ -106,7 +104,6 @@ BlitProperties BlitProperties::constructPropertiesForCopy(GraphicsAllocation *ds return { nullptr, // outputTimestampPacket - nullptr, // multiRootDeviceEventSync BlitterConstants::BlitDirection::BufferToBuffer, // blitDirection {}, // csrDependencies AuxTranslationDirection::None, // auxTranslationDirection @@ -131,7 +128,6 @@ BlitProperties BlitProperties::constructPropertiesForAuxTranslation(AuxTranslati auto allocationSize = allocation->getUnderlyingBufferSize(); return { nullptr, // outputTimestampPacket - nullptr, // multiRootDeviceEventSync BlitterConstants::BlitDirection::BufferToBuffer, // blitDirection {}, // csrDependencies auxTranslationDirection, // auxTranslationDirection diff --git a/shared/source/helpers/blit_commands_helper.h b/shared/source/helpers/blit_commands_helper.h index 2c7ef5f0fd..f5f4d345bd 100644 --- a/shared/source/helpers/blit_commands_helper.h +++ b/shared/source/helpers/blit_commands_helper.h @@ -61,7 +61,6 @@ struct BlitProperties { CommandStreamReceiver &gpguCsr, CommandStreamReceiver &bcsCsr); TagNodeBase *outputTimestampPacket = nullptr; - TagNodeBase *multiRootDeviceEventSync = nullptr; BlitterConstants::BlitDirection blitDirection = BlitterConstants::BlitDirection::BufferToHostPtr; CsrDependencies csrDependencies; AuxTranslationDirection auxTranslationDirection = AuxTranslationDirection::None; diff --git a/shared/source/helpers/blit_commands_helper_base.inl b/shared/source/helpers/blit_commands_helper_base.inl index 9c5aae061d..0507ecce64 100644 --- a/shared/source/helpers/blit_commands_helper_base.inl +++ b/shared/source/helpers/blit_commands_helper_base.inl @@ -125,7 +125,7 @@ size_t BlitCommandsHelper::estimateBlitCommandSize(const Vec3 sizePerBlit += estimatePostBlitCommandSize(); return TimestampPacketHelper::getRequiredCmdStreamSize(csrDependencies) + - TimestampPacketHelper::getRequiredCmdStreamSizeForMultiRootDeviceSyncNodesContainer(csrDependencies) + + TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer(csrDependencies) + (sizePerBlit * nBlits) + timestampCmdSize + estimatePreBlitCommandSize(); @@ -141,9 +141,6 @@ size_t BlitCommandsHelper::estimateBlitCommandsSize(const BlitPropert auto isImage = blitProperties.isImageOperation(); size += BlitCommandsHelper::estimateBlitCommandSize(blitProperties.copySize, blitProperties.csrDependencies, updateTimestampPacket, profilingEnabled, isImage, rootDeviceEnvironment, blitProperties.isSystemMemoryPoolUsed); - if (blitProperties.multiRootDeviceEventSync != nullptr) { - size += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); - } } size += BlitCommandsHelper::getWaCmdsSize(blitPropertiesContainer); size += 2 * MemorySynchronizationCommands::getSizeForAdditonalSynchronization(*rootDeviceEnvironment.getHardwareInfo()); diff --git a/shared/source/helpers/timestamp_packet.h b/shared/source/helpers/timestamp_packet.h index dae11adb4e..c42aa3d5c3 100644 --- a/shared/source/helpers/timestamp_packet.h +++ b/shared/source/helpers/timestamp_packet.h @@ -145,11 +145,17 @@ struct TimestampPacketHelper { } template - static void programCsrDependenciesForForMultiRootDeviceSyncContainer(LinearStream &cmdStream, const CsrDependencies &csrDependencies) { - for (auto timestampPacketContainer : csrDependencies.multiRootTimeStampSyncContainer) { - for (auto &node : timestampPacketContainer->peekNodes()) { - TimestampPacketHelper::programSemaphore(cmdStream, *node); - } + static void programCsrDependenciesForForTaskCountContainer(LinearStream &cmdStream, const CsrDependencies &csrDependencies) { + auto &taskCountContainer = csrDependencies.taskCountContainer; + + for (auto &[taskCountPreviousRootDevice, tagAddressPreviousRootDevice] : taskCountContainer) { + using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION; + using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT; + + EncodeSempahore::addMiSemaphoreWaitCommand(cmdStream, + static_cast(tagAddressPreviousRootDevice), + static_cast(taskCountPreviousRootDevice), + COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); } } @@ -212,8 +218,8 @@ struct TimestampPacketHelper { } template - static size_t getRequiredCmdStreamSizeForMultiRootDeviceSyncNodesContainer(const CsrDependencies &csrDependencies) { - return csrDependencies.multiRootTimeStampSyncContainer.size() * sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT); + static size_t getRequiredCmdStreamSizeForTaskCountContainer(const CsrDependencies &csrDependencies) { + return csrDependencies.taskCountContainer.size() * sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT); } }; diff --git a/shared/test/common/mocks/mock_command_stream_receiver.h b/shared/test/common/mocks/mock_command_stream_receiver.h index 4b3e21b209..00bd156ba3 100644 --- a/shared/test/common/mocks/mock_command_stream_receiver.h +++ b/shared/test/common/mocks/mock_command_stream_receiver.h @@ -17,7 +17,6 @@ #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/surface.h" #include "shared/source/os_interface/os_context.h" -#include "shared/source/utilities/tag_allocator.h" #include "shared/test/common/helpers/dispatch_flags_helper.h" #include @@ -94,7 +93,6 @@ class MockCommandStreamReceiver : public CommandStreamReceiver { }; TagAllocatorBase *getTimestampPacketAllocator() override { return nullptr; } - std::unique_ptr createMultiRootDeviceTimestampPacketAllocator(const RootDeviceIndicesContainer rootDeviceIndices) override { return std::unique_ptr(nullptr); } CompletionStamp flushTask( LinearStream &commandStream, diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index 9a8cd4323a..bba0fd020b 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -15,13 +15,10 @@ #include "shared/source/helpers/api_specific_config.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/surface.h" -#include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/utilities/tag_allocator.h" -#include "shared/test/common/cmd_parse/gen_cmd_parse.h" -#include "shared/test/common/cmd_parse/hw_parse.h" #include "shared/test/common/fixtures/command_stream_receiver_fixture.inl" #include "shared/test/common/fixtures/device_fixture.h" #include "shared/test/common/helpers/batch_buffer_helper.h" @@ -35,7 +32,6 @@ #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_internal_allocation_storage.h" #include "shared/test/common/mocks/mock_memory_manager.h" -#include "shared/test/common/mocks/mock_timestamp_container.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/hw_test.h" #include "shared/test/common/test_macros/test_checks_shared.h" @@ -2465,89 +2461,3 @@ HWTEST_F(CommandStreamReceiverHwTest, givenVariousCsrModeWhenGettingTbxModeThenE ultCsr.commandStreamReceiverType = CommandStreamReceiverType::CSR_TBX_WITH_AUB; EXPECT_TRUE(ultCsr.isTbxMode()); } - -HWTEST_F(CommandStreamReceiverHwTest, GivenTwoRootDevicesWhengetMultiRootDeviceTimestampPacketAllocatorCalledThenAllocatorForTwoDevicesCreated) { - auto executionEnvironment = std::make_unique(defaultHwInfo.get(), true, 2u); - auto devices = DeviceFactory::createDevices(*executionEnvironment.release()); - const RootDeviceIndicesContainer indices = {0u, 1u}; - auto csr = devices[0]->getDefaultEngine().commandStreamReceiver; - auto allocator = csr->createMultiRootDeviceTimestampPacketAllocator(indices); - class MockTagAllocatorBase : public TagAllocatorBase { - public: - using TagAllocatorBase::maxRootDeviceIndex; - }; - EXPECT_EQ(reinterpret_cast(allocator.get())->maxRootDeviceIndex, 1u); -} -HWTEST_F(CommandStreamReceiverHwTest, GivenFiveRootDevicesWhengetMultiRootDeviceTimestampPacketAllocatorCalledThenAllocatorForFiveDevicesCreated) { - auto executionEnvironment = std::make_unique(defaultHwInfo.get(), true, 4u); - auto devices = DeviceFactory::createDevices(*executionEnvironment.release()); - const RootDeviceIndicesContainer indices = {0u, 1u, 2u, 3u}; - auto csr = devices[0]->getDefaultEngine().commandStreamReceiver; - auto allocator = csr->createMultiRootDeviceTimestampPacketAllocator(indices); - class MockTagAllocatorBase : public TagAllocatorBase { - public: - using TagAllocatorBase::maxRootDeviceIndex; - }; - EXPECT_EQ(reinterpret_cast(allocator.get())->maxRootDeviceIndex, 3u); -} -HWTEST_F(CommandStreamReceiverHwTest, givenMultiRootDeviceSyncNodeWhenFlushBcsTAskThenMiFlushAdded) { - using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; - auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); - auto mockTagAllocator = std::make_unique>(pDevice->getRootDeviceIndex(), pDevice->getExecutionEnvironment()->memoryManager.get(), 10u); - - auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::BufferToHostPtr, - commandStreamReceiver, commandStreamReceiver.getTagAllocation(), nullptr, - commandStreamReceiver.getTagAllocation()->getUnderlyingBuffer(), - commandStreamReceiver.getTagAllocation()->getGpuAddress(), 0, - 0, 0, 0, 0, 0, 0, 0); - auto tag = mockTagAllocator->getTag(); - blitProperties.multiRootDeviceEventSync = tag; - - BlitPropertiesContainer container; - container.push_back(blitProperties); - commandStreamReceiver.flushBcsTask(container, true, false, *pDevice); - HardwareParse hwParser; - hwParser.parseCommands(commandStreamReceiver.commandStream, 0); - - auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - bool nodeAddressFound = false; - while (cmdIterator != hwParser.cmdList.end()) { - auto flush = genCmdCast(*cmdIterator); - if (flush->getDestinationAddress() == tag->getGpuAddress() + tag->getContextEndOffset()) { - nodeAddressFound = true; - break; - } - cmdIterator = find(++cmdIterator, hwParser.cmdList.end()); - } - EXPECT_TRUE(nodeAddressFound); -} -HWTEST_F(CommandStreamReceiverHwTest, givenNullPtrAsMultiRootDeviceSyncNodeWhenFlushBcsTAskThenMiFlushNotAdded) { - using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; - auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); - auto mockTagAllocator = std::make_unique>(pDevice->getRootDeviceIndex(), pDevice->getExecutionEnvironment()->memoryManager.get(), 10u); - - auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::BufferToHostPtr, - commandStreamReceiver, commandStreamReceiver.getTagAllocation(), nullptr, - commandStreamReceiver.getTagAllocation()->getUnderlyingBuffer(), - commandStreamReceiver.getTagAllocation()->getGpuAddress(), 0, - 0, 0, 0, 0, 0, 0, 0); - auto tag = mockTagAllocator->getTag(); - - BlitPropertiesContainer container; - container.push_back(blitProperties); - commandStreamReceiver.flushBcsTask(container, true, false, *pDevice); - HardwareParse hwParser; - hwParser.parseCommands(commandStreamReceiver.commandStream, 0); - - auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - bool nodeAddressFound = false; - while (cmdIterator != hwParser.cmdList.end()) { - auto flush = genCmdCast(*cmdIterator); - if (flush->getDestinationAddress() == tag->getGpuAddress() + tag->getContextEndOffset()) { - nodeAddressFound = true; - break; - } - cmdIterator = find(++cmdIterator, hwParser.cmdList.end()); - } - EXPECT_FALSE(nodeAddressFound); -} \ No newline at end of file diff --git a/shared/test/unit_test/helpers/blit_commands_helper_tests.cpp b/shared/test/unit_test/helpers/blit_commands_helper_tests.cpp index 1575ecc7eb..e0df0e40ff 100644 --- a/shared/test/unit_test/helpers/blit_commands_helper_tests.cpp +++ b/shared/test/unit_test/helpers/blit_commands_helper_tests.cpp @@ -15,7 +15,6 @@ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" -#include "shared/test/common/mocks/mock_timestamp_container.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/test_checks_shared.h" @@ -664,25 +663,3 @@ HWTEST2_F(BlitTests, givenPlatformWhenCallingDispatchPreBlitCommandThenNoneMiFlu auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); ASSERT_EQ(hwParser.cmdList.end(), cmdIterator); } - -HWTEST_F(BlitTests, givenPlatformWhenCallingDispatchPreBlitCommandThenNoneMiFlushDwIsProgramed) { - auto mockTagAllocator = std::make_unique>(pDevice->getRootDeviceIndex(), pDevice->getExecutionEnvironment()->memoryManager.get(), 10u); - auto tag = mockTagAllocator->getTag(); - BlitProperties blitProperties{}; - blitProperties.copySize = {1, 1, 1}; - BlitPropertiesContainer blitPropertiesContainer1; - blitPropertiesContainer1.push_back(blitProperties); - blitPropertiesContainer1.push_back(blitProperties); - blitPropertiesContainer1.push_back(blitProperties); - - auto estimatedSizeWithoutNode = BlitCommandsHelper::estimateBlitCommandsSize( - blitPropertiesContainer1, false, true, false, pDevice->getRootDeviceEnvironment()); - blitProperties.multiRootDeviceEventSync = tag; - BlitPropertiesContainer blitPropertiesContainer2; - blitPropertiesContainer2.push_back(blitProperties); - blitPropertiesContainer2.push_back(blitProperties); - blitPropertiesContainer2.push_back(blitProperties); - auto estimatedSizeWithNode = BlitCommandsHelper::estimateBlitCommandsSize( - blitPropertiesContainer2, false, true, false, pDevice->getRootDeviceEnvironment()); - EXPECT_NE(estimatedSizeWithoutNode, estimatedSizeWithNode); -} \ No newline at end of file diff --git a/shared/test/unit_test/helpers/timestamp_packet_tests.cpp b/shared/test/unit_test/helpers/timestamp_packet_tests.cpp index 6af9bae1e9..5cfe1675d4 100644 --- a/shared/test/unit_test/helpers/timestamp_packet_tests.cpp +++ b/shared/test/unit_test/helpers/timestamp_packet_tests.cpp @@ -301,35 +301,3 @@ HWTEST_F(DeviceTimestampPacketTests, givenDebugFlagSetWhenCreatingTimestampPacke EXPECT_FALSE(tag->canBeReleased()); } - -using TimestampPacketHelperTests = Test; - -HWTEST_F(TimestampPacketHelperTests, givenTagNodesInMultiRootSyncContainerWhenProgramingDependensiecThenSemaforesAreProgrammed) { - StackVec buffer(4096); - LinearStream cmdStream(buffer.begin(), buffer.size()); - CsrDependencies deps; - auto mockTagAllocator = std::make_unique>(0, pDevice->getMemoryManager()); - TimestampPacketContainer container = {}; - container.add(mockTagAllocator->getTag()); - deps.multiRootTimeStampSyncContainer.push_back(&container); - TimestampPacketHelper::programCsrDependenciesForForMultiRootDeviceSyncContainer(cmdStream, deps); - EXPECT_EQ(cmdStream.getUsed(), sizeof(typename FamilyType::MI_SEMAPHORE_WAIT)); -} - -HWTEST_F(TimestampPacketHelperTests, givenEmptyContainerMultiRootSyncContainerWhenProgramingDependensiecThenZeroSemaforesAreProgrammed) { - StackVec buffer(4096); - LinearStream cmdStream(buffer.begin(), buffer.size()); - CsrDependencies deps; - TimestampPacketContainer container = {}; - deps.multiRootTimeStampSyncContainer.push_back(&container); - TimestampPacketHelper::programCsrDependenciesForForMultiRootDeviceSyncContainer(cmdStream, deps); - EXPECT_EQ(cmdStream.getUsed(), 0u); -} - -HWTEST_F(TimestampPacketHelperTests, givenEmptyMultiRootSyncContainerWhenProgramingDependensiecThenZeroSemaforesAreProgrammed) { - StackVec buffer(4096); - LinearStream cmdStream(buffer.begin(), buffer.size()); - CsrDependencies deps; - TimestampPacketHelper::programCsrDependenciesForForMultiRootDeviceSyncContainer(cmdStream, deps); - EXPECT_EQ(cmdStream.getUsed(), 0u); -} \ No newline at end of file