diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index 33485970bb..af9feba981 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -131,6 +131,7 @@ CommandQueue::CommandQueue(Context *context, ClDevice *device, const cl_queue_pr } this->heaplessModeEnabled = compilerProductHelper.isHeaplessModeEnabled(); + this->heaplessStateInitEnabled = compilerProductHelper.isHeaplessStateInitEnabled(); } } diff --git a/opencl/source/command_queue/command_queue.h b/opencl/source/command_queue/command_queue.h index 33558df257..92a0e39bb5 100644 --- a/opencl/source/command_queue/command_queue.h +++ b/opencl/source/command_queue/command_queue.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -383,6 +383,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> { void handlePostCompletionOperations(bool checkQueueCompletion); bool getHeaplessModeEnabled() const { return this->heaplessModeEnabled; } + bool getHeaplessStateInitEnabled() const { return this->heaplessStateInitEnabled; } bool isBcsSplitInitialized() const { return this->bcsSplitInitialized; } @@ -480,6 +481,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> { bool splitBarrierRequired = false; bool gpgpuCsrClientRegistered = false; bool heaplessModeEnabled = false; + bool heaplessStateInitEnabled = false; }; template diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index 59b7851dc7..30a987bf8d 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -954,15 +954,25 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( } PRINT_DEBUG_STRING(debugManager.flags.PrintDebugMessages.get(), stdout, "preemption = %d.\n", static_cast(dispatchFlags.preemptionMode)); - CompletionStamp completionStamp = getGpgpuCommandStreamReceiver().flushTask( - commandStream, - commandStreamStart, - dsh, - ioh, - &getIndirectHeap(IndirectHeap::Type::surfaceState, 0u), - taskLevel, - dispatchFlags, - getDevice()); + + CompletionStamp completionStamp = getHeaplessStateInitEnabled() ? getGpgpuCommandStreamReceiver().flushTaskStateless( + commandStream, + commandStreamStart, + dsh, + ioh, + &getIndirectHeap(IndirectHeap::Type::surfaceState, 0u), + taskLevel, + dispatchFlags, + getDevice()) + : getGpgpuCommandStreamReceiver().flushTask( + commandStream, + commandStreamStart, + dsh, + ioh, + &getIndirectHeap(IndirectHeap::Type::surfaceState, 0u), + taskLevel, + dispatchFlags, + getDevice()); if (isHandlingBarrier) { clearLastBcsPackets(); @@ -1172,15 +1182,24 @@ CompletionStamp CommandQueueHw::enqueueCommandWithoutKernel( dispatchFlags.csrDependencies.makeResident(getGpgpuCommandStreamReceiver()); } - completionStamp = getGpgpuCommandStreamReceiver().flushTask( - *commandStream, - commandStreamStart, - &getIndirectHeap(IndirectHeap::Type::dynamicState, 0u), - &getIndirectHeap(IndirectHeap::Type::indirectObject, 0u), - &getIndirectHeap(IndirectHeap::Type::surfaceState, 0u), - taskLevel, - dispatchFlags, - getDevice()); + completionStamp = getHeaplessStateInitEnabled() ? getGpgpuCommandStreamReceiver().flushTaskStateless( + *commandStream, + commandStreamStart, + &getIndirectHeap(IndirectHeap::Type::dynamicState, 0u), + &getIndirectHeap(IndirectHeap::Type::indirectObject, 0u), + &getIndirectHeap(IndirectHeap::Type::surfaceState, 0u), + taskLevel, + dispatchFlags, + getDevice()) + : getGpgpuCommandStreamReceiver().flushTask( + *commandStream, + commandStreamStart, + &getIndirectHeap(IndirectHeap::Type::dynamicState, 0u), + &getIndirectHeap(IndirectHeap::Type::indirectObject, 0u), + &getIndirectHeap(IndirectHeap::Type::surfaceState, 0u), + taskLevel, + dispatchFlags, + getDevice()); if (isHandlingBarrier) { clearLastBcsPackets(); diff --git a/opencl/source/helpers/task_information.cpp b/opencl/source/helpers/task_information.cpp index ef872d832e..616788226e 100644 --- a/opencl/source/helpers/task_information.cpp +++ b/opencl/source/helpers/task_information.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -90,14 +90,22 @@ CompletionStamp &CommandMapUnmap::submit(TaskCountType taskLevel, bool terminate gtpinNotifyPreFlushTask(&commandQueue); - completionStamp = commandStreamReceiver.flushTask(queueCommandStream, - offset, - &commandQueue.getIndirectHeap(IndirectHeap::Type::dynamicState, 0u), - &commandQueue.getIndirectHeap(IndirectHeap::Type::indirectObject, 0u), - &commandQueue.getIndirectHeap(IndirectHeap::Type::surfaceState, 0u), - taskLevel, - dispatchFlags, - commandQueue.getDevice()); + completionStamp = commandQueue.getHeaplessStateInitEnabled() ? commandStreamReceiver.flushTaskStateless(queueCommandStream, + offset, + &commandQueue.getIndirectHeap(IndirectHeap::Type::dynamicState, 0u), + &commandQueue.getIndirectHeap(IndirectHeap::Type::indirectObject, 0u), + &commandQueue.getIndirectHeap(IndirectHeap::Type::surfaceState, 0u), + taskLevel, + dispatchFlags, + commandQueue.getDevice()) + : commandStreamReceiver.flushTask(queueCommandStream, + offset, + &commandQueue.getIndirectHeap(IndirectHeap::Type::dynamicState, 0u), + &commandQueue.getIndirectHeap(IndirectHeap::Type::indirectObject, 0u), + &commandQueue.getIndirectHeap(IndirectHeap::Type::surfaceState, 0u), + taskLevel, + dispatchFlags, + commandQueue.getDevice()); commandQueue.updateLatestSentEnqueueType(EnqueueProperties::Operation::dependencyResolveOnGpu); @@ -255,14 +263,22 @@ CompletionStamp &CommandComputeKernel::submit(TaskCountType taskLevel, bool term } } - completionStamp = commandStreamReceiver.flushTask(*kernelOperation->commandStream, - 0, - dsh, - ioh, - ssh, - taskLevel, - dispatchFlags, - commandQueue.getDevice()); + completionStamp = commandQueue.getHeaplessStateInitEnabled() ? commandStreamReceiver.flushTaskStateless(*kernelOperation->commandStream, + 0, + dsh, + ioh, + ssh, + taskLevel, + dispatchFlags, + commandQueue.getDevice()) + : commandStreamReceiver.flushTask(*kernelOperation->commandStream, + 0, + dsh, + ioh, + ssh, + taskLevel, + dispatchFlags, + commandQueue.getDevice()); if (isHandlingBarrier) { commandQueue.clearLastBcsPackets(); @@ -413,14 +429,22 @@ CompletionStamp &CommandWithoutKernel::submit(TaskCountType taskLevel, bool term gtpinNotifyPreFlushTask(&commandQueue); - completionStamp = commandStreamReceiver.flushTask(*kernelOperation->commandStream, - 0, - &commandQueue.getIndirectHeap(IndirectHeap::Type::dynamicState, 0u), - &commandQueue.getIndirectHeap(IndirectHeap::Type::indirectObject, 0u), - &commandQueue.getIndirectHeap(IndirectHeap::Type::surfaceState, 0u), - taskLevel, - dispatchFlags, - commandQueue.getDevice()); + completionStamp = commandQueue.getHeaplessStateInitEnabled() ? commandStreamReceiver.flushTaskStateless(*kernelOperation->commandStream, + 0, + &commandQueue.getIndirectHeap(IndirectHeap::Type::dynamicState, 0u), + &commandQueue.getIndirectHeap(IndirectHeap::Type::indirectObject, 0u), + &commandQueue.getIndirectHeap(IndirectHeap::Type::surfaceState, 0u), + taskLevel, + dispatchFlags, + commandQueue.getDevice()) + : commandStreamReceiver.flushTask(*kernelOperation->commandStream, + 0, + &commandQueue.getIndirectHeap(IndirectHeap::Type::dynamicState, 0u), + &commandQueue.getIndirectHeap(IndirectHeap::Type::indirectObject, 0u), + &commandQueue.getIndirectHeap(IndirectHeap::Type::surfaceState, 0u), + taskLevel, + dispatchFlags, + commandQueue.getDevice()); if (isHandlingBarrier) { commandQueue.clearLastBcsPackets(); diff --git a/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp b/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp index 38e276bc3e..b1983020b0 100644 --- a/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp +++ b/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp @@ -242,7 +242,7 @@ HWTEST_TEMPLATED_F(BlitAuxTranslationTests, whenFlushTagUpdateThenSetStallingCmd HWTEST_TEMPLATED_F(BlitAuxTranslationTests, whenInitializeDeviceWithFirstSubmissionThenMiFlushDwIsFlushed) { using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; - EXPECT_EQ(SubmissionStatus::success, bcsCsr->initializeDeviceWithFirstSubmission()); + EXPECT_EQ(SubmissionStatus::success, bcsCsr->initializeDeviceWithFirstSubmission(device->getDevice())); auto cmdListBcs = getCmdList(bcsCsr->getCS(0), 0); diff --git a/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp index cfc26985ee..5df8ea8545 100644 --- a/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -171,6 +171,12 @@ struct CreateAllocationForHostSurfaceCsr : public CommandStreamReceiverHw(0u)}; } + + CompletionStamp flushTaskStateless(LinearStream &commandStream, size_t commandStreamStart, + const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, + TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override { + return CompletionStamp{0u, 0u, static_cast(0u)}; + } }; HWTEST_F(EnqueueReadImageTest, givenCommandQueueAndPtrCopyAllowedForHostSurfaceWhenBlockingEnqueueReadImageThenSuccessIsReturned) { diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index 84f2efb682..c1a8231e70 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -558,6 +558,19 @@ class CommandStreamReceiverMock : public CommandStreamReceiver { return cs; } + CompletionStamp flushTaskStateless( + LinearStream &commandStream, + size_t commandStreamStart, + const IndirectHeap *dsh, + const IndirectHeap *ioh, + const IndirectHeap *ssh, + TaskCountType taskLevel, + DispatchFlags &dispatchFlags, + Device &device) override { + CompletionStamp cs = {}; + return cs; + } + CompletionStamp flushImmediateTask( LinearStream &immediateCommandStream, size_t immediateCommandStreamStart, @@ -601,7 +614,7 @@ class CommandStreamReceiverMock : public CommandStreamReceiver { } void postInitFlagsSetup() override {} - SubmissionStatus initializeDeviceWithFirstSubmission() override { return SubmissionStatus::success; } + SubmissionStatus initializeDeviceWithFirstSubmission(Device &device) override { return SubmissionStatus::success; } std::map residency; std::unique_ptr mockExecutionEnvironment; diff --git a/opencl/test/unit_test/mocks/mock_command_queue.h b/opencl/test/unit_test/mocks/mock_command_queue.h index 9ad107960b..baa71962ad 100644 --- a/opencl/test/unit_test/mocks/mock_command_queue.h +++ b/opencl/test/unit_test/mocks/mock_command_queue.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -272,6 +272,8 @@ class MockCommandQueueHw : public CommandQueueHw { using BaseClass::deferredTimestampPackets; using BaseClass::getDevice; using BaseClass::gpgpuEngine; + using BaseClass::heaplessModeEnabled; + using BaseClass::heaplessStateInitEnabled; using BaseClass::isBlitAuxTranslationRequired; using BaseClass::isCompleted; using BaseClass::latestSentEnqueueType; diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index 360010a4eb..c35d05e4d9 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -401,8 +401,8 @@ struct EncodeComputeMode { const PipelineSelectArgs &args, bool hasSharedHandles, const RootDeviceEnvironment &rootDeviceEnvironment, bool isRcs, bool dcFlush); static void programComputeModeCommand(LinearStream &csr, StateComputeModeProperties &properties, const RootDeviceEnvironment &rootDeviceEnvironment); - static void adjustPipelineSelect(CommandContainer &container, const NEO::KernelDescriptor &kernelDescriptor); + static size_t getSizeForComputeMode(); }; template diff --git a/shared/source/command_container/encode_compute_mode_tgllp_and_later.inl b/shared/source/command_container/encode_compute_mode_tgllp_and_later.inl index f674430bf2..ce1dbb3b07 100644 --- a/shared/source/command_container/encode_compute_mode_tgllp_and_later.inl +++ b/shared/source/command_container/encode_compute_mode_tgllp_and_later.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -33,6 +33,11 @@ size_t EncodeComputeMode::getCmdSizeForComputeMode(const RootDeviceEnvir return size; } +template +inline size_t EncodeComputeMode::getSizeForComputeMode() { + return sizeof(typename GfxFamily::STATE_COMPUTE_MODE); +} + template inline void EncodeComputeMode::programComputeModeCommandWithSynchronization( LinearStream &csr, StateComputeModeProperties &properties, const PipelineSelectArgs &args, diff --git a/shared/source/command_stream/CMakeLists.txt b/shared/source/command_stream/CMakeLists.txt index 72399d4ada..23bed9fb96 100644 --- a/shared/source/command_stream/CMakeLists.txt +++ b/shared/source/command_stream/CMakeLists.txt @@ -87,5 +87,11 @@ if(SUPPORT_DG2_AND_LATER) ) endif() +if(NOT SUPPORT_HEAPLESS) + list(APPEND NEO_CORE_COMMAND_STREAM + ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_heap_addressing.inl + ) +endif() + set_property(GLOBAL PROPERTY NEO_CORE_COMMAND_STREAM ${NEO_CORE_COMMAND_STREAM}) add_subdirectories() diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index 696d798fda..cd80ae1e40 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -96,6 +96,11 @@ class CommandStreamReceiver { virtual CompletionStamp flushTask(LinearStream &commandStreamTask, size_t commandStreamTaskStart, const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) = 0; + + virtual CompletionStamp flushTaskStateless(LinearStream &commandStreamTask, size_t commandStreamTaskStart, + const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, + TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) = 0; + virtual CompletionStamp flushBcsTask(LinearStream &commandStream, size_t commandStreamStart, const DispatchBcsFlags &dispatchBcsFlags, const HardwareInfo &hwInfo) = 0; virtual CompletionStamp flushImmediateTask(LinearStream &immediateCommandStream, size_t immediateCommandStreamStart, ImmediateDispatchFlags &dispatchFlags, Device &device) = 0; @@ -414,7 +419,7 @@ class CommandStreamReceiver { lastPreemptionMode = value; } - virtual SubmissionStatus initializeDeviceWithFirstSubmission() = 0; + virtual SubmissionStatus initializeDeviceWithFirstSubmission(Device &device) = 0; uint32_t getNumClients() const { return this->numClients.load(); diff --git a/shared/source/command_stream/command_stream_receiver_hw.h b/shared/source/command_stream/command_stream_receiver_hw.h index 04f31ad8a6..7c8498da61 100644 --- a/shared/source/command_stream/command_stream_receiver_hw.h +++ b/shared/source/command_stream/command_stream_receiver_hw.h @@ -59,6 +59,12 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override; + CompletionStamp flushTaskStateless(LinearStream &commandStream, size_t commandStreamStart, + const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, + TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override; + + void addPipeControlFlushTaskIfNeeded(LinearStream &commandStreamCSR, TaskCountType taskLevel); + CompletionStamp flushBcsTask(LinearStream &commandStreamTask, size_t commandStreamTaskStart, const DispatchBcsFlags &dispatchBcsFlags, const HardwareInfo &hwInfo) override; CompletionStamp flushImmediateTask(LinearStream &immediateCommandStream, size_t immediateCommandStreamStart, @@ -79,6 +85,10 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { size_t getRequiredCmdStreamSizeAligned(const DispatchFlags &dispatchFlags, Device &device); size_t getRequiredCmdStreamSize(const DispatchBcsFlags &dispatchBcsFlags); size_t getRequiredCmdStreamSizeAligned(const DispatchBcsFlags &dispatchBcsFlags); + + size_t getRequiredCmdStreamHeaplessSize(const DispatchFlags &dispatchFlags, Device &device); + size_t getRequiredCmdStreamHeaplessSizeAligned(const DispatchFlags &dispatchFlags, Device &device); + size_t getRequiredCmdSizeForPreamble(Device &device) const; size_t getCmdSizeForPreemption(const DispatchFlags &dispatchFlags) const; size_t getCmdSizeForEpilogue(const DispatchFlags &dispatchFlags) const; @@ -164,7 +174,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { return getCmdSizeForStallingNoPostSyncCommands(); } void programStallingCommandsForBarrier(LinearStream &cmdStream, TimestampPacketContainer *barrierTimestampPacketNodes, const bool isDcFlushRequired) override; - SubmissionStatus initializeDeviceWithFirstSubmission() override; + SubmissionStatus initializeDeviceWithFirstSubmission(Device &device) override; HeapDirtyState &getDshState() { return dshState; @@ -179,6 +189,12 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { void dispatchRayTracingStateCommand(LinearStream &cmdStream, Device &device); uint64_t getScratchPatchAddress(); + SubmissionStatus programHeaplessProlog(Device &device); + void programHeaplessStateProlog(Device &device, LinearStream &commandStream); + void programStateBaseAddressHeapless(Device &device, LinearStream &commandStream); + void programComputeModeHeapless(Device &device, LinearStream &commandStream); + void handleAllocationsResidencyForflushTaskStateless(const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh); + protected: void programPreemption(LinearStream &csr, DispatchFlags &dispatchFlags); void programL3(LinearStream &csr, uint32_t &newL3Config); @@ -198,6 +214,8 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { void programEnginePrologue(LinearStream &csr); size_t getCmdSizeForPrologue() const; + size_t getCmdSizeForHeaplessPrologue(Device &device) const; + void handleAllocationsResidencyForHeaplessProlog(LinearStream &linearStream, Device &device); void setClearSlmWorkAroundParameter(PipeControlArgs &args); void addPipeControlBeforeStateSip(LinearStream &commandStream, Device &device); @@ -328,6 +346,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { size_t cmdStreamStart = 0; uint32_t latestSentBcsWaValue = std::numeric_limits::max(); + bool heaplessPrologueSent = false; }; } // namespace NEO diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index c76570f6e5..7e21b1cf68 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -181,6 +181,30 @@ size_t CommandStreamReceiverHw::getCmdsSizeForHardwareContext() const return getCmdSizeForPrologue(); } +template +void CommandStreamReceiverHw::addPipeControlFlushTaskIfNeeded(LinearStream &commandStreamCSR, TaskCountType taskLevel) { + + if (this->requiresInstructionCacheFlush) { + MemorySynchronizationCommands::addInstructionCacheFlush(commandStreamCSR); + this->requiresInstructionCacheFlush = false; + } + + // Add a Pipe Control if we have a dependency on a previous walker to avoid concurrency issues. + if (taskLevel > this->taskLevel) { + const auto programPipeControl = !timestampPacketWriteEnabled; + if (programPipeControl) { + PipeControlArgs args; + MemorySynchronizationCommands::addSingleBarrier(commandStreamCSR, args); + } + this->taskLevel = taskLevel; + DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "this->taskCount", peekTaskCount()); + } + + if (debugManager.flags.ForcePipeControlPriorToWalker.get()) { + forcePipeControl(commandStreamCSR); + } +} + template CompletionStamp CommandStreamReceiverHw::flushBcsTask(LinearStream &commandStreamTask, size_t commandStreamTaskStart, const DispatchBcsFlags &dispatchBcsFlags, const HardwareInfo &hwInfo) { @@ -509,25 +533,7 @@ CompletionStamp CommandStreamReceiverHw::flushTask( MemorySynchronizationCommands::addStateCacheFlush(commandStreamCSR, device.getRootDeviceEnvironment()); } - if (requiresInstructionCacheFlush) { - MemorySynchronizationCommands::addInstructionCacheFlush(commandStreamCSR); - requiresInstructionCacheFlush = false; - } - - // Add a Pipe Control if we have a dependency on a previous walker to avoid concurrency issues. - if (taskLevel > this->taskLevel) { - const auto programPipeControl = !timestampPacketWriteEnabled; - if (programPipeControl) { - PipeControlArgs args; - MemorySynchronizationCommands::addSingleBarrier(commandStreamCSR, args); - } - this->taskLevel = taskLevel; - DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "this->taskCount", peekTaskCount()); - } - - if (debugManager.flags.ForcePipeControlPriorToWalker.get()) { - forcePipeControl(commandStreamCSR); - } + addPipeControlFlushTaskIfNeeded(commandStreamCSR, taskLevel); this->makeResident(*tagAllocation); @@ -1452,11 +1458,6 @@ size_t CommandStreamReceiverHw::getCmdSizeForComputeMode() { return EncodeComputeMode::getCmdSizeForComputeMode(this->peekRootDeviceEnvironment(), hasSharedHandles(), isRcs()); } -template -SubmissionStatus CommandStreamReceiverHw::initializeDeviceWithFirstSubmission() { - return flushTagUpdate(); -} - template void CommandStreamReceiverHw::handleFrontEndStateTransition(const DispatchFlags &dispatchFlags) { if (streamProperties.frontEndState.disableOverdispatch.value != -1) { @@ -2302,5 +2303,4 @@ inline void CommandStreamReceiverHw::chainCsrWorkToTask(LinearStream this->makeResident(*chainedBatchBuffer); EncodeNoop::alignToCacheLine(commandStreamCSR); } - } // namespace NEO diff --git a/shared/source/command_stream/command_stream_receiver_hw_heap_addressing.inl b/shared/source/command_stream/command_stream_receiver_hw_heap_addressing.inl new file mode 100644 index 0000000000..c116de057c --- /dev/null +++ b/shared/source/command_stream/command_stream_receiver_hw_heap_addressing.inl @@ -0,0 +1,76 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/command_stream/command_stream_receiver_hw.h" + +namespace NEO { + +template +SubmissionStatus CommandStreamReceiverHw::initializeDeviceWithFirstSubmission(Device &device) { + return flushTagUpdate(); +} + +template +CompletionStamp CommandStreamReceiverHw::flushTaskStateless( + LinearStream &commandStream, size_t commandStreamStart, + const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, + TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) { + + UNRECOVERABLE_IF(true); + return {}; +} + +template +SubmissionStatus CommandStreamReceiverHw::programHeaplessProlog(Device &device) { + UNRECOVERABLE_IF(true); + return SubmissionStatus::unsupported; +} + +template +void CommandStreamReceiverHw::programStateBaseAddressHeapless(Device &device, LinearStream &commandStream) { + UNRECOVERABLE_IF(true); +} + +template +void CommandStreamReceiverHw::programComputeModeHeapless(Device &device, LinearStream &commandStream) { + UNRECOVERABLE_IF(true); +} + +template +void CommandStreamReceiverHw::programHeaplessStateProlog(Device &device, LinearStream &commandStream) { + UNRECOVERABLE_IF(true); +} + +template +size_t CommandStreamReceiverHw::getCmdSizeForHeaplessPrologue(Device &device) const { + UNRECOVERABLE_IF(true); + return 0; +} + +template +void CommandStreamReceiverHw::handleAllocationsResidencyForflushTaskStateless(const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh) { + UNRECOVERABLE_IF(true); +} + +template +void CommandStreamReceiverHw::handleAllocationsResidencyForHeaplessProlog(LinearStream &linearStream, Device &device) { + UNRECOVERABLE_IF(true); +} + +template +inline size_t CommandStreamReceiverHw::getRequiredCmdStreamHeaplessSize(const DispatchFlags &dispatchFlags, Device &device) { + UNRECOVERABLE_IF(true); + return 0u; +} + +template +inline size_t CommandStreamReceiverHw::getRequiredCmdStreamHeaplessSizeAligned(const DispatchFlags &dispatchFlags, Device &device) { + UNRECOVERABLE_IF(true); + return 0u; +} + +} // namespace NEO diff --git a/shared/source/device/device.cpp b/shared/source/device/device.cpp index fe8bb63438..40c96d1ce7 100644 --- a/shared/source/device/device.cpp +++ b/shared/source/device/device.cpp @@ -18,6 +18,7 @@ #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/api_specific_config.h" +#include "shared/source/helpers/compiler_product_helper.h" #include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/ray_tracing_helper.h" #include "shared/source/memory_manager/allocation_properties.h" @@ -395,7 +396,9 @@ bool Device::createEngine(uint32_t deviceCsrIndex, EngineTypeUsage engineTypeUsa const auto defaultEngineType = engineInstanced ? this->engineInstancedType : getChosenEngineType(hwInfo); const bool isDefaultEngine = defaultEngineType == engineType && engineUsage == EngineUsage::regular; const bool createAsEngineInstanced = engineInstanced && EngineHelpers::isCcs(engineType); - const bool isPrimaryEngine = gfxCoreHelper.areSecondaryContextsSupported() && EngineHelpers::isCcs(engineType) && engineUsage == EngineUsage::regular; + + const bool isPrimaryEngine = EngineHelpers::isCcs(engineType) && engineUsage == EngineUsage::regular; + const bool useContextGroup = isPrimaryEngine && gfxCoreHelper.areSecondaryContextsSupported(); UNRECOVERABLE_IF(EngineHelpers::isBcs(engineType) && !hwInfo.capabilityTable.blitterOperationsSupported); @@ -416,7 +419,7 @@ bool Device::createEngine(uint32_t deviceCsrIndex, EngineTypeUsage engineTypeUsa EngineDescriptor engineDescriptor(engineTypeUsage, getDeviceBitfield(), preemptionMode, false, createAsEngineInstanced); auto osContext = executionEnvironment->memoryManager->createAndRegisterOsContext(commandStreamReceiver.get(), engineDescriptor); - osContext->setContextGroup(isPrimaryEngine); + osContext->setContextGroup(useContextGroup); commandStreamReceiver->setupContext(*osContext); @@ -447,7 +450,7 @@ bool Device::createEngine(uint32_t deviceCsrIndex, EngineTypeUsage engineTypeUsa if (osContext->isDebuggableContext() || this->isInitDeviceWithFirstSubmissionSupported(commandStreamReceiver->getType())) { - if (SubmissionStatus::success != commandStreamReceiver->initializeDeviceWithFirstSubmission()) { + if (SubmissionStatus::success != commandStreamReceiver->initializeDeviceWithFirstSubmission(*this)) { return false; } firstSubmissionDone = true; @@ -455,8 +458,12 @@ bool Device::createEngine(uint32_t deviceCsrIndex, EngineTypeUsage engineTypeUsa } } - if (isPrimaryEngine && !firstSubmissionDone) { - commandStreamReceiver->initializeDeviceWithFirstSubmission(); + auto &compilerProductHelper = this->getCompilerProductHelper(); + bool isHeaplessStateInit = isPrimaryEngine && compilerProductHelper.isHeaplessStateInitEnabled(); + bool initializeDevice = (useContextGroup || isHeaplessStateInit) && !firstSubmissionDone; + + if (initializeDevice) { + commandStreamReceiver->initializeDeviceWithFirstSubmission(*this); } if (EngineHelpers::isBcs(engineType) && (defaultBcsEngineIndex == std::numeric_limits::max()) && (engineUsage == EngineUsage::regular)) { diff --git a/shared/source/execution_environment/execution_environment.cpp b/shared/source/execution_environment/execution_environment.cpp index 28690a1541..9074e3cb77 100644 --- a/shared/source/execution_environment/execution_environment.cpp +++ b/shared/source/execution_environment/execution_environment.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -93,6 +93,10 @@ void ExecutionEnvironment::calculateMaxOsContextCount() { auto ccsCount = hwInfo->gtSystemInfo.CCSInfo.NumberOfCCSEnabled; bool hasRootCsr = subDevicesCount > 1; + if (debugManager.flags.ContextGroupSize.get() >= 1) { + MemoryManager::maxOsContextCount += debugManager.flags.ContextGroupSize.get(); + } + MemoryManager::maxOsContextCount += osContextCount * subDevicesCount + hasRootCsr; if (ccsCount > 1 && debugManager.flags.EngineInstancedSubDevices.get()) { diff --git a/shared/source/gen11/command_stream_receiver_hw_gen11.cpp b/shared/source/gen11/command_stream_receiver_hw_gen11.cpp index 24a932c1dd..d9114c386f 100644 --- a/shared/source/gen11/command_stream_receiver_hw_gen11.cpp +++ b/shared/source/gen11/command_stream_receiver_hw_gen11.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2023 Intel Corporation + * Copyright (C) 2019-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -7,6 +7,7 @@ #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/command_stream_receiver_hw_bdw_and_later.inl" +#include "shared/source/command_stream/command_stream_receiver_hw_heap_addressing.inl" #include "shared/source/command_stream/device_command_stream.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/gen11/hw_cmds_base.h" diff --git a/shared/source/gen12lp/command_stream_receiver_hw_gen12lp.cpp b/shared/source/gen12lp/command_stream_receiver_hw_gen12lp.cpp index 41fc0cc7b9..6483f02ecf 100644 --- a/shared/source/gen12lp/command_stream_receiver_hw_gen12lp.cpp +++ b/shared/source/gen12lp/command_stream_receiver_hw_gen12lp.cpp @@ -11,6 +11,7 @@ using Family = NEO::Gen12LpFamily; #include "shared/source/command_stream/command_stream_receiver_hw_bdw_and_later.inl" +#include "shared/source/command_stream/command_stream_receiver_hw_heap_addressing.inl" #include "shared/source/command_stream/device_command_stream.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/helpers/blit_commands_helper_bdw_and_later.inl" diff --git a/shared/source/gen8/command_stream_receiver_hw_gen8.cpp b/shared/source/gen8/command_stream_receiver_hw_gen8.cpp index ad6cb8ada9..075ae27e6c 100644 --- a/shared/source/gen8/command_stream_receiver_hw_gen8.cpp +++ b/shared/source/gen8/command_stream_receiver_hw_gen8.cpp @@ -1,11 +1,12 @@ /* - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver_hw_bdw_and_later.inl" +#include "shared/source/command_stream/command_stream_receiver_hw_heap_addressing.inl" #include "shared/source/command_stream/device_command_stream.h" #include "shared/source/gen8/hw_cmds_base.h" #include "shared/source/helpers/blit_commands_helper_bdw_and_later.inl" diff --git a/shared/source/gen9/command_stream_receiver_hw_gen9.cpp b/shared/source/gen9/command_stream_receiver_hw_gen9.cpp index 34d95056dd..7da909c5ba 100644 --- a/shared/source/gen9/command_stream_receiver_hw_gen9.cpp +++ b/shared/source/gen9/command_stream_receiver_hw_gen9.cpp @@ -1,11 +1,12 @@ /* - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/command_stream/command_stream_receiver_hw_bdw_and_later.inl" +#include "shared/source/command_stream/command_stream_receiver_hw_heap_addressing.inl" #include "shared/source/command_stream/device_command_stream.h" #include "shared/source/gen9/hw_cmds_base.h" #include "shared/source/helpers/blit_commands_helper_bdw_and_later.inl" diff --git a/shared/source/helpers/compiler_product_helper.h b/shared/source/helpers/compiler_product_helper.h index d301f4fc27..38ed078f26 100644 --- a/shared/source/helpers/compiler_product_helper.h +++ b/shared/source/helpers/compiler_product_helper.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2023 Intel Corporation + * Copyright (C) 2021-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -77,6 +77,7 @@ class CompilerProductHelper { virtual StackVec getDeviceOpenCLCVersions(const HardwareInfo &hwInfo, OclCVersion max) const = 0; virtual void adjustHwInfoForIgc(HardwareInfo &hwInfo) const = 0; virtual bool isHeaplessModeEnabled() const = 0; + virtual bool isHeaplessStateInitEnabled() const = 0; virtual ~CompilerProductHelper() = default; uint32_t getHwIpVersion(const HardwareInfo &hwInfo) const; @@ -119,6 +120,7 @@ class CompilerProductHelperHw : public CompilerProductHelper { StackVec getDeviceOpenCLCVersions(const HardwareInfo &hwInfo, OclCVersion max) const override; void adjustHwInfoForIgc(HardwareInfo &hwInfo) const override; bool isHeaplessModeEnabled() const override; + bool isHeaplessStateInitEnabled() const override; ~CompilerProductHelperHw() override = default; diff --git a/shared/source/helpers/compiler_product_helper_base.inl b/shared/source/helpers/compiler_product_helper_base.inl index 129cd99f1d..2e7ef8a1e9 100644 --- a/shared/source/helpers/compiler_product_helper_base.inl +++ b/shared/source/helpers/compiler_product_helper_base.inl @@ -232,6 +232,11 @@ bool CompilerProductHelperHw::isHeaplessModeEnabled() const { return false; } +template +bool CompilerProductHelperHw::isHeaplessStateInitEnabled() const { + return false; +} + template uint32_t CompilerProductHelperHw::matchRevisionIdWithProductConfig(HardwareIpVersion ipVersion, uint32_t revisionID) const { return ipVersion.value; diff --git a/shared/source/helpers/state_base_address.h b/shared/source/helpers/state_base_address.h index f3fdc74d43..e7d2c1b626 100644 --- a/shared/source/helpers/state_base_address.h +++ b/shared/source/helpers/state_base_address.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -79,5 +79,6 @@ struct StateBaseAddressHelper { static uint32_t getMaxBindlessSurfaceStates(); static void programHeaplessStateBaseAddress(STATE_BASE_ADDRESS &sba); + static size_t getSbaCmdSize(); }; } // namespace NEO diff --git a/shared/source/helpers/state_base_address_base.inl b/shared/source/helpers/state_base_address_base.inl index d256ef66f5..d9f2c89136 100644 --- a/shared/source/helpers/state_base_address_base.inl +++ b/shared/source/helpers/state_base_address_base.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2023 Intel Corporation + * Copyright (C) 2019-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -146,4 +146,9 @@ void StateBaseAddressHelper::programBindingTableBaseAddress(LinearStr StateBaseAddressHelper::programBindingTableBaseAddress(commandStream, ssh.getHeapGpuBase(), ssh.getHeapSizeInPages(), gmmHelper); } +template +inline size_t StateBaseAddressHelper::getSbaCmdSize() { + return sizeof(typename GfxFamily::STATE_BASE_ADDRESS); +} + } // namespace NEO diff --git a/shared/source/xe_hpc_core/command_stream_receiver_hw_xe_hpc_core.cpp b/shared/source/xe_hpc_core/command_stream_receiver_hw_xe_hpc_core.cpp index dd367e0443..6b0011b337 100644 --- a/shared/source/xe_hpc_core/command_stream_receiver_hw_xe_hpc_core.cpp +++ b/shared/source/xe_hpc_core/command_stream_receiver_hw_xe_hpc_core.cpp @@ -13,6 +13,7 @@ using Family = NEO::XeHpcCoreFamily; #include "shared/source/command_stream/command_stream_receiver_hw_dg2_and_later.inl" +#include "shared/source/command_stream/command_stream_receiver_hw_heap_addressing.inl" #include "shared/source/command_stream/command_stream_receiver_hw_xehp_and_later.inl" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/helpers/blit_commands_helper_xehp_and_later.inl" diff --git a/shared/source/xe_hpg_core/command_stream_receiver_hw_xe_hpg_core.cpp b/shared/source/xe_hpg_core/command_stream_receiver_hw_xe_hpg_core.cpp index 97a3dae55c..44e1073029 100644 --- a/shared/source/xe_hpg_core/command_stream_receiver_hw_xe_hpg_core.cpp +++ b/shared/source/xe_hpg_core/command_stream_receiver_hw_xe_hpg_core.cpp @@ -12,6 +12,7 @@ using Family = NEO::XeHpgCoreFamily; #include "shared/source/command_stream/command_stream_receiver_hw_dg2_and_later.inl" +#include "shared/source/command_stream/command_stream_receiver_hw_heap_addressing.inl" #include "shared/source/command_stream/command_stream_receiver_hw_xehp_and_later.inl" #include "shared/source/helpers/blit_commands_helper_xehp_and_later.inl" #include "shared/source/helpers/populate_factory.h" diff --git a/shared/test/common/libult/ult_command_stream_receiver.h b/shared/test/common/libult/ult_command_stream_receiver.h index bf18cb2620..3a70a9772e 100644 --- a/shared/test/common/libult/ult_command_stream_receiver.h +++ b/shared/test/common/libult/ult_command_stream_receiver.h @@ -55,9 +55,11 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ using BaseClass::dcFlushSupport; using BaseClass::directSubmission; using BaseClass::dshState; + using BaseClass::getCmdSizeForHeaplessPrologue; using BaseClass::getCmdSizeForPrologue; using BaseClass::getScratchPatchAddress; using BaseClass::getScratchSpaceController; + using BaseClass::handleAllocationsResidencyForHeaplessProlog; using BaseClass::handleFrontEndStateTransition; using BaseClass::handlePipelineSelectStateTransition; using BaseClass::handleStateBaseAddressStateTransition; @@ -68,6 +70,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ using BaseClass::isDirectSubmissionEnabled; using BaseClass::isPerDssBackedBufferSent; using BaseClass::makeResident; + using BaseClass::pageTableManagerInitialized; using BaseClass::perDssBackedBuffer; using BaseClass::postInitFlagsSetup; using BaseClass::programActivePartitionConfig; @@ -84,6 +87,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ using BaseClass::sshState; using BaseClass::staticWorkPartitioningEnabled; using BaseClass::streamProperties; + using BaseClass::wasSubmittedToSingleSubdevice; using BaseClass::CommandStreamReceiver::activePartitions; using BaseClass::CommandStreamReceiver::activePartitionsConfig; @@ -97,6 +101,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ using BaseClass::CommandStreamReceiver::commandStream; using BaseClass::CommandStreamReceiver::debugConfirmationFunction; using BaseClass::CommandStreamReceiver::debugPauseStateAddress; + using BaseClass::CommandStreamReceiver::debugSurface; using BaseClass::CommandStreamReceiver::deviceBitfield; using BaseClass::CommandStreamReceiver::dispatchMode; using BaseClass::CommandStreamReceiver::doubleSbaWa; @@ -134,6 +139,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ using BaseClass::CommandStreamReceiver::ownershipMutex; using BaseClass::CommandStreamReceiver::perfCounterAllocator; using BaseClass::CommandStreamReceiver::pipelineSupportFlags; + using BaseClass::CommandStreamReceiver::preemptionAllocation; using BaseClass::CommandStreamReceiver::profilingTimeStampAllocator; using BaseClass::CommandStreamReceiver::requestedPreallocationsAmount; using BaseClass::CommandStreamReceiver::requiredScratchSlot0Size; @@ -145,6 +151,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ using BaseClass::CommandStreamReceiver::stateComputeModeDirty; using BaseClass::CommandStreamReceiver::submissionAggregator; using BaseClass::CommandStreamReceiver::tagAddress; + using BaseClass::CommandStreamReceiver::tagAllocation; using BaseClass::CommandStreamReceiver::taskCount; using BaseClass::CommandStreamReceiver::taskLevel; using BaseClass::CommandStreamReceiver::timestampPacketAllocator; diff --git a/shared/test/common/mocks/mock_command_stream_receiver.cpp b/shared/test/common/mocks/mock_command_stream_receiver.cpp index b878c2f9b8..b3f8425572 100644 --- a/shared/test/common/mocks/mock_command_stream_receiver.cpp +++ b/shared/test/common/mocks/mock_command_stream_receiver.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2023 Intel Corporation + * Copyright (C) 2021-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -28,6 +28,20 @@ CompletionStamp MockCommandStreamReceiver::flushTask( return stamp; } +CompletionStamp MockCommandStreamReceiver::flushTaskStateless( + LinearStream &commandStream, + size_t commandStreamStart, + const IndirectHeap *dsh, + const IndirectHeap *ioh, + const IndirectHeap *ssh, + TaskCountType taskLevel, + DispatchFlags &dispatchFlags, + Device &device) { + ++taskCount; + CompletionStamp stamp = {taskCount, taskLevel, flushStamp->peekStamp()}; + return stamp; +} + CompletionStamp MockCommandStreamReceiver::flushBcsTask(LinearStream &commandStreamTask, size_t commandStreamTaskStart, const DispatchBcsFlags &dispatchBcsFlags, const HardwareInfo &hwInfo) { ++taskCount; diff --git a/shared/test/common/mocks/mock_command_stream_receiver.h b/shared/test/common/mocks/mock_command_stream_receiver.h index ceab2fe496..4802fa41f9 100644 --- a/shared/test/common/mocks/mock_command_stream_receiver.h +++ b/shared/test/common/mocks/mock_command_stream_receiver.h @@ -119,6 +119,16 @@ class MockCommandStreamReceiver : public CommandStreamReceiver { DispatchFlags &dispatchFlags, Device &device) override; + CompletionStamp flushTaskStateless( + LinearStream &commandStream, + size_t commandStreamStart, + const IndirectHeap *dsh, + const IndirectHeap *ioh, + const IndirectHeap *ssh, + TaskCountType taskLevel, + DispatchFlags &dispatchFlags, + Device &device) override; + CompletionStamp flushImmediateTask( LinearStream &immediateCommandStream, size_t immediateCommandStreamStart, @@ -207,7 +217,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver { } return isLocked; } - SubmissionStatus initializeDeviceWithFirstSubmission() override { return SubmissionStatus::success; } + SubmissionStatus initializeDeviceWithFirstSubmission(Device &device) override { return SubmissionStatus::success; } static constexpr size_t tagSize = 256; static volatile TagAddressType mockTagAddress[tagSize]; diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index fe35508056..30eb034fa4 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -3028,10 +3028,10 @@ HWTEST_F(CommandStreamReceiverHwTest, givenOutOfMemoryFailureOnFlushWhenInitiali commandStreamReceiver.flushReturnValue = SubmissionStatus::outOfMemory; - EXPECT_EQ(SubmissionStatus::outOfMemory, commandStreamReceiver.initializeDeviceWithFirstSubmission()); + EXPECT_EQ(SubmissionStatus::outOfMemory, commandStreamReceiver.initializeDeviceWithFirstSubmission(*pDevice)); commandStreamReceiver.flushReturnValue = SubmissionStatus::outOfHostMemory; - EXPECT_EQ(SubmissionStatus::outOfHostMemory, commandStreamReceiver.initializeDeviceWithFirstSubmission()); + EXPECT_EQ(SubmissionStatus::outOfHostMemory, commandStreamReceiver.initializeDeviceWithFirstSubmission(*pDevice)); } HWTEST_F(CommandStreamReceiverHwTest, whenFlushTagUpdateThenSetStallingCmdsFlag) { @@ -5022,3 +5022,53 @@ HWTEST2_F(CommandStreamReceiverHwTest, givenImplicitScalingEnabledWhenProgrammin EXPECT_EQ(estimatedCmdSize, offset); } + +HWTEST_F(CommandStreamReceiverHwTest, givenForcePipeControlPriorToWalkerWhenAddPipeControlFlushTaskIfNeededThenStallingPcIsProgrammed) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + + DebugManagerStateRestore dbgRestorer; + debugManager.flags.ForcePipeControlPriorToWalker.set(1); + + auto &csr = pDevice->getUltCommandStreamReceiver(); + + csr.addPipeControlFlushTaskIfNeeded(commandStream, 0); + + GenCmdList commands; + CmdParse::parseCommandBuffer(commands, + commandStream.getCpuBase(), + commandStream.getUsed()); + + auto itorCmd = find(commands.begin(), commands.end()); + ASSERT_NE(commands.end(), itorCmd); + + auto pc = genCmdCast(*itorCmd); + EXPECT_TRUE(pc->getCommandStreamerStallEnable()); +} + +HWTEST_F(CommandStreamReceiverTest, givenBcsCsrWhenInitializeDeviceWithFirstSubmissionIsCalledThenSuccessIsReturned) { + MockOsContext mockOsContext(0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::EngineType::ENGINE_BCS, EngineUsage::regular})); + MockCsrHw commandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); + commandStreamReceiver.setupContext(mockOsContext); + commandStreamReceiver.initializeTagAllocation(); + + EXPECT_EQ(SubmissionStatus::success, commandStreamReceiver.initializeDeviceWithFirstSubmission(*pDevice)); +} + +using CommandStreamReceiverHwHeaplessTest = Test; + +HWTEST_F(CommandStreamReceiverHwHeaplessTest, whenHeaplessCommandStreamReceiverFunctionsAreCalledThenExceptionIsThrown) { + std::unique_ptr> csr = std::make_unique>(*pDevice->executionEnvironment, rootDeviceIndex, pDevice->getDeviceBitfield()); + + LinearStream commandStream(0, 0); + + EXPECT_ANY_THROW(csr->flushTaskStateless(commandStream, 0, nullptr, nullptr, nullptr, 0, csr->recordedDispatchFlags, *pDevice)); + EXPECT_ANY_THROW(csr->programHeaplessProlog(*pDevice)); + EXPECT_ANY_THROW(csr->programStateBaseAddressHeapless(*pDevice, commandStream)); + EXPECT_ANY_THROW(csr->programComputeModeHeapless(*pDevice, commandStream)); + EXPECT_ANY_THROW(csr->getCmdSizeForHeaplessPrologue(*pDevice)); + EXPECT_ANY_THROW(csr->handleAllocationsResidencyForHeaplessProlog(commandStream, *pDevice)); + EXPECT_ANY_THROW(csr->programHeaplessStateProlog(*pDevice, commandStream)); + EXPECT_ANY_THROW(csr->handleAllocationsResidencyForflushTaskStateless(nullptr, nullptr, nullptr)); + EXPECT_ANY_THROW(csr->getRequiredCmdStreamHeaplessSize(csr->recordedDispatchFlags, *pDevice)); + EXPECT_ANY_THROW(csr->getRequiredCmdStreamHeaplessSizeAligned(csr->recordedDispatchFlags, *pDevice)); +} diff --git a/shared/test/unit_test/device/neo_device_tests.cpp b/shared/test/unit_test/device/neo_device_tests.cpp index dc7f07826b..d24c912f0f 100644 --- a/shared/test/unit_test/device/neo_device_tests.cpp +++ b/shared/test/unit_test/device/neo_device_tests.cpp @@ -1097,8 +1097,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DeviceTests, givenCCSEngineAndContextGroupSizeEnabl UltDeviceFactory deviceFactory{1, 0, executionEnvironment}; - MemoryManager::maxOsContextCount++; - deviceFactory.rootDevices[0]->createEngine(0, {aub_stream::EngineType::ENGINE_CCS, EngineUsage::regular}); auto defaultEngine = deviceFactory.rootDevices[0]->getDefaultEngine();