/* * Copyright (C) 2018-2023 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/aub_subcapture_status.h" #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/command_stream/submissions_aggregator.h" #include "shared/source/command_stream/wait_status.h" #include "shared/source/direct_submission/direct_submission_hw.h" #include "shared/source/helpers/blit_properties.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/surface.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/common/helpers/dispatch_flags_helper.h" #include "shared/test/common/helpers/ult_hw_config.h" #include #include namespace NEO { class GmmPageTableMngr; template class UltCommandStreamReceiver : public CommandStreamReceiverHw, public NonCopyableOrMovableClass { using BaseClass = CommandStreamReceiverHw; public: using BaseClass::addPipeControlBefore3dState; using BaseClass::bcsRelaxedOrderingAllowed; using BaseClass::blitterDirectSubmission; using BaseClass::checkPlatformSupportsGpuIdleImplicitFlush; using BaseClass::checkPlatformSupportsNewResourceImplicitFlush; using BaseClass::createKernelArgsBufferAllocation; using BaseClass::csrSizeRequestFlags; using BaseClass::directSubmission; using BaseClass::dshState; using BaseClass::getCmdSizeForPrologue; using BaseClass::getScratchPatchAddress; using BaseClass::getScratchSpaceController; using BaseClass::handleFrontEndStateTransition; using BaseClass::handlePipelineSelectStateTransition; using BaseClass::handleStateBaseAddressStateTransition; using BaseClass::heapStorageRequiresRecyclingTag; using BaseClass::indirectHeap; using BaseClass::iohState; using BaseClass::isBlitterDirectSubmissionEnabled; using BaseClass::isDirectSubmissionEnabled; using BaseClass::isPerDssBackedBufferSent; using BaseClass::kernelArgsBufferAllocation; using BaseClass::logicalStateHelper; using BaseClass::makeResident; using BaseClass::perDssBackedBuffer; using BaseClass::postInitFlagsSetup; using BaseClass::programActivePartitionConfig; using BaseClass::programEnginePrologue; using BaseClass::programPerDssBackedBuffer; using BaseClass::programPreamble; using BaseClass::programStallingCommandsForBarrier; using BaseClass::programStallingNoPostSyncCommandsForBarrier; using BaseClass::programStallingPostSyncCommandsForBarrier; using BaseClass::programStateSip; using BaseClass::programVFEState; using BaseClass::requiresInstructionCacheFlush; using BaseClass::rootDeviceIndex; using BaseClass::sshState; using BaseClass::staticWorkPartitioningEnabled; using BaseClass::streamProperties; using BaseClass::wasSubmittedToSingleSubdevice; using BaseClass::CommandStreamReceiver::activePartitions; using BaseClass::CommandStreamReceiver::activePartitionsConfig; using BaseClass::CommandStreamReceiver::baseWaitFunction; using BaseClass::CommandStreamReceiver::bindingTableBaseAddressRequired; using BaseClass::CommandStreamReceiver::canUse4GbHeaps; using BaseClass::CommandStreamReceiver::checkForNewResources; using BaseClass::CommandStreamReceiver::checkImplicitFlushForGpuIdle; using BaseClass::CommandStreamReceiver::cleanupResources; using BaseClass::CommandStreamReceiver::clearColorAllocation; using BaseClass::CommandStreamReceiver::commandStream; using BaseClass::CommandStreamReceiver::debugConfirmationFunction; using BaseClass::CommandStreamReceiver::debugPauseStateAddress; using BaseClass::CommandStreamReceiver::deviceBitfield; using BaseClass::CommandStreamReceiver::dispatchMode; using BaseClass::CommandStreamReceiver::doubleSbaWa; using BaseClass::CommandStreamReceiver::downloadAllocationImpl; using BaseClass::CommandStreamReceiver::executionEnvironment; using BaseClass::CommandStreamReceiver::experimentalCmdBuffer; using BaseClass::CommandStreamReceiver::feSupportFlags; using BaseClass::CommandStreamReceiver::flushStamp; using BaseClass::CommandStreamReceiver::globalFenceAllocation; using BaseClass::CommandStreamReceiver::gpuHangCheckPeriod; using BaseClass::CommandStreamReceiver::gsbaFor32BitProgrammed; using BaseClass::CommandStreamReceiver::immWritePostSyncWriteOffset; using BaseClass::CommandStreamReceiver::initDirectSubmission; using BaseClass::CommandStreamReceiver::internalAllocationStorage; using BaseClass::CommandStreamReceiver::isBlitterDirectSubmissionEnabled; using BaseClass::CommandStreamReceiver::isDirectSubmissionEnabled; using BaseClass::CommandStreamReceiver::isEnginePrologueSent; using BaseClass::CommandStreamReceiver::isPreambleSent; using BaseClass::CommandStreamReceiver::isStateSipSent; using BaseClass::CommandStreamReceiver::lastAdditionalKernelExecInfo; using BaseClass::CommandStreamReceiver::lastKernelExecutionType; using BaseClass::CommandStreamReceiver::lastMediaSamplerConfig; using BaseClass::CommandStreamReceiver::lastMemoryCompressionState; using BaseClass::CommandStreamReceiver::lastPreemptionMode; using BaseClass::CommandStreamReceiver::lastSentL3Config; using BaseClass::CommandStreamReceiver::lastSentUseGlobalAtomics; using BaseClass::CommandStreamReceiver::lastSystolicPipelineSelectMode; using BaseClass::CommandStreamReceiver::lastVmeSubslicesConfig; using BaseClass::CommandStreamReceiver::latestFlushedTaskCount; using BaseClass::CommandStreamReceiver::latestSentStatelessMocsConfig; using BaseClass::CommandStreamReceiver::latestSentTaskCount; using BaseClass::CommandStreamReceiver::mediaVfeStateDirty; using BaseClass::CommandStreamReceiver::newResources; using BaseClass::CommandStreamReceiver::osContext; using BaseClass::CommandStreamReceiver::ownershipMutex; using BaseClass::CommandStreamReceiver::perfCounterAllocator; using BaseClass::CommandStreamReceiver::pipelineSupportFlags; using BaseClass::CommandStreamReceiver::profilingTimeStampAllocator; using BaseClass::CommandStreamReceiver::requiredPrivateScratchSize; using BaseClass::CommandStreamReceiver::requiredScratchSize; using BaseClass::CommandStreamReceiver::resourcesInitialized; using BaseClass::CommandStreamReceiver::samplerCacheFlushRequired; using BaseClass::CommandStreamReceiver::sbaSupportFlags; using BaseClass::CommandStreamReceiver::scratchSpaceController; using BaseClass::CommandStreamReceiver::stateComputeModeDirty; using BaseClass::CommandStreamReceiver::submissionAggregator; using BaseClass::CommandStreamReceiver::tagAddress; using BaseClass::CommandStreamReceiver::taskCount; using BaseClass::CommandStreamReceiver::taskLevel; using BaseClass::CommandStreamReceiver::timestampPacketAllocator; using BaseClass::CommandStreamReceiver::timestampPacketWriteEnabled; using BaseClass::CommandStreamReceiver::timeStampPostSyncWriteOffset; using BaseClass::CommandStreamReceiver::useGpuIdleImplicitFlush; using BaseClass::CommandStreamReceiver::useNewResourceImplicitFlush; using BaseClass::CommandStreamReceiver::useNotifyEnableForPostSync; using BaseClass::CommandStreamReceiver::userPauseConfirmation; using BaseClass::CommandStreamReceiver::waitForTaskCountAndCleanAllocationList; using BaseClass::CommandStreamReceiver::workPartitionAllocation; UltCommandStreamReceiver(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) : BaseClass(executionEnvironment, rootDeviceIndex, deviceBitfield), recursiveLockCounter(0), recordedDispatchFlags(DispatchFlagsHelper::createDefaultDispatchFlags()) { this->downloadAllocationImpl = [this](GraphicsAllocation &graphicsAllocation) { this->downloadAllocationUlt(graphicsAllocation); }; } ~UltCommandStreamReceiver() override { this->downloadAllocationImpl = nullptr; } static CommandStreamReceiver *create(bool withAubDump, ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) { return new UltCommandStreamReceiver(executionEnvironment, rootDeviceIndex, deviceBitfield); } GmmPageTableMngr *createPageTableManager() override { createPageTableManagerCalled = true; return nullptr; } void makeSurfacePackNonResident(ResidencyContainer &allocationsForResidency, bool clearAllocations) override { makeSurfacePackNonResidentCalled++; BaseClass::makeSurfacePackNonResident(allocationsForResidency, clearAllocations); } NEO::SubmissionStatus flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override { if (flushReturnValue) { return *flushReturnValue; } if (recordFlusheBatchBuffer) { latestFlushedBatchBuffer = batchBuffer; } latestSentTaskCountValueDuringFlush = latestSentTaskCount; return BaseClass::flush(batchBuffer, allocationsForResidency); } CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart, const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override { recordedDispatchFlags = dispatchFlags; this->lastFlushedCommandStream = &commandStream; return BaseClass::flushTask(commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device); } size_t getPreferredTagPoolSize() const override { return BaseClass::getPreferredTagPoolSize() + 1; } void setPreemptionAllocation(GraphicsAllocation *allocation) { this->preemptionAllocation = allocation; } void downloadAllocations() override { downloadAllocationCalled = true; downloadAllocationsCalled = true; downloadAllocationsCalledCount++; } void downloadAllocationUlt(GraphicsAllocation &gfxAllocation) { downloadAllocationCalled = true; } WaitStatus waitForCompletionWithTimeout(const WaitParams ¶ms, TaskCountType taskCountToWait) override { latestWaitForCompletionWithTimeoutTaskCount.store(taskCountToWait); latestWaitForCompletionWithTimeoutWaitParams = params; waitForCompletionWithTimeoutTaskCountCalled++; if (callBaseWaitForCompletionWithTimeout) { return BaseClass::waitForCompletionWithTimeout(params, taskCountToWait); } return returnWaitForCompletionWithTimeout; } void fillReusableAllocationsList() override { fillReusableAllocationsListCalled++; if (callBaseFillReusableAllocationsList) { return BaseClass::fillReusableAllocationsList(); } } WaitStatus waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, TaskCountType taskCountToWait) { return waitForCompletionWithTimeout(WaitParams{false, enableTimeout, timeoutMicroseconds}, taskCountToWait); } WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) override { if (waitForTaskCountWithKmdNotifyFallbackReturnValue.has_value()) { return *waitForTaskCountWithKmdNotifyFallbackReturnValue; } return BaseClass::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, throttle); } void overrideCsrSizeReqFlags(CsrSizeRequestFlags &flags) { this->csrSizeRequestFlags = flags; } GraphicsAllocation *getPreemptionAllocation() const { return this->preemptionAllocation; } void makeResident(GraphicsAllocation &gfxAllocation) override { if (storeMakeResidentAllocations) { std::map::iterator it = makeResidentAllocations.find(&gfxAllocation); if (it == makeResidentAllocations.end()) { std::pair::iterator, bool> result; result = makeResidentAllocations.insert(std::pair(&gfxAllocation, 1)); DEBUG_BREAK_IF(!result.second); } else { makeResidentAllocations[&gfxAllocation]++; } } BaseClass::makeResident(gfxAllocation); } bool isMadeResident(GraphicsAllocation *graphicsAllocation) const { return makeResidentAllocations.find(graphicsAllocation) != makeResidentAllocations.end(); } bool isMadeResident(GraphicsAllocation *graphicsAllocation, TaskCountType taskCount) const { auto it = makeResidentAllocations.find(graphicsAllocation); if (it == makeResidentAllocations.end()) { return false; } return (it->first->getTaskCount(osContext->getContextId()) == taskCount); } bool isMadeResident(GraphicsAllocation *graphicsAllocation, uint32_t residentCount) const { auto it = makeResidentAllocations.find(graphicsAllocation); if (it == makeResidentAllocations.end()) { return false; } return it->second == residentCount; } std::map makeResidentAllocations; bool storeMakeResidentAllocations = false; AubSubCaptureStatus checkAndActivateAubSubCapture(const std::string &kernelName) override { auto status = CommandStreamReceiverHw::checkAndActivateAubSubCapture(kernelName); checkAndActivateAubSubCaptureCalled = true; return status; } void addAubComment(const char *message) override { CommandStreamReceiverHw::addAubComment(message); aubCommentMessages.push_back(message); addAubCommentCalled = true; } bool flushBatchedSubmissions() override { flushBatchedSubmissionsCalled = true; if (shouldFailFlushBatchedSubmissions) { return false; } if (shouldFlushBatchedSubmissionsReturnSuccess) { return true; } return CommandStreamReceiverHw::flushBatchedSubmissions(); } SubmissionStatus flushTagUpdate() override { flushTagUpdateCalled = true; return CommandStreamReceiverHw::flushTagUpdate(); } void initProgrammingFlags() override { CommandStreamReceiverHw::initProgrammingFlags(); initProgrammingFlagsCalled = true; } std::unique_lock obtainUniqueOwnership() override { recursiveLockCounter++; return CommandStreamReceiverHw::obtainUniqueOwnership(); } TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { blitBufferCalled++; receivedBlitProperties = blitPropertiesContainer; if (callBaseFlushBcsTask) { return CommandStreamReceiverHw::flushBcsTask(blitPropertiesContainer, blocking, profilingEnabled, device); } else { return flushBcsTaskReturnValue; } } bool createPerDssBackedBuffer(Device &device) override { createPerDssBackedBufferCalled++; return BaseClass::createPerDssBackedBuffer(device); } bool isMultiOsContextCapable() const override { if (callBaseIsMultiOsContextCapable) { return BaseClass::isMultiOsContextCapable(); } return multiOsContextCapable; } bool initDirectSubmission() override { if (ultHwConfig.csrFailInitDirectSubmission) { return false; } initDirectSubmissionCalled++; return BaseClass::CommandStreamReceiver::initDirectSubmission(); } bool isDirectSubmissionEnabled() const override { if (ultHwConfig.csrBaseCallDirectSubmissionAvailable) { return BaseClass::isDirectSubmissionEnabled(); } if (ultHwConfig.csrSuperBaseCallDirectSubmissionAvailable) { return BaseClass::CommandStreamReceiver::isDirectSubmissionEnabled(); } return directSubmissionAvailable; } bool isBlitterDirectSubmissionEnabled() const override { if (ultHwConfig.csrBaseCallBlitterDirectSubmissionAvailable) { return BaseClass::isBlitterDirectSubmissionEnabled(); } if (ultHwConfig.csrSuperBaseCallBlitterDirectSubmissionAvailable) { return BaseClass::CommandStreamReceiver::isBlitterDirectSubmissionEnabled(); } return blitterDirectSubmissionAvailable; } bool isKmdWaitOnTaskCountAllowed() const override { if (callBaseIsKmdWaitOnTaskCountAllowed) { return BaseClass::isKmdWaitOnTaskCountAllowed(); } return isKmdWaitOnTaskCountAllowedValue; } bool createAllocationForHostSurface(HostPtrSurface &surface, bool requiresL3Flush) override { createAllocationForHostSurfaceCalled++; cpuCopyForHostPtrSurfaceAllowed = surface.peekIsPtrCopyAllowed(); return BaseClass::createAllocationForHostSurface(surface, requiresL3Flush); } void ensureCommandBufferAllocation(LinearStream &commandStream, size_t minimumRequiredSize, size_t additionalAllocationSize) override { ensureCommandBufferAllocationCalled++; BaseClass::ensureCommandBufferAllocation(commandStream, minimumRequiredSize, additionalAllocationSize); } CommandStreamReceiverType getType() const override { return commandStreamReceiverType; } void pollForCompletion() override { pollForCompletionCalled++; } bool checkGpuHangDetected(CommandStreamReceiver::TimeType currentTime, CommandStreamReceiver::TimeType &lastHangCheckTime) const override { checkGpuHangDetectedCalled++; if (forceReturnGpuHang) { return true; } return BaseClass::checkGpuHangDetected(currentTime, lastHangCheckTime); } SubmissionStatus sendRenderStateCacheFlush() override { if (callBaseSendRenderStateCacheFlush) { return BaseClass::sendRenderStateCacheFlush(); } return *flushReturnValue; } void stopDirectSubmission() override { stopDirectSubmissionCalled = true; BaseClass::stopDirectSubmission(); } std::vector aubCommentMessages; BatchBuffer latestFlushedBatchBuffer = {}; std::atomic latestWaitForCompletionWithTimeoutTaskCount{0}; TaskCountType latestSentTaskCountValueDuringFlush = 0; WaitParams latestWaitForCompletionWithTimeoutWaitParams{0}; TaskCountType flushBcsTaskReturnValue{}; LinearStream *lastFlushedCommandStream = nullptr; std::atomic recursiveLockCounter; std::atomic waitForCompletionWithTimeoutTaskCountCalled{0}; uint32_t makeSurfacePackNonResidentCalled = false; uint32_t blitBufferCalled = 0; uint32_t createPerDssBackedBufferCalled = 0; uint32_t initDirectSubmissionCalled = 0; uint32_t fillReusableAllocationsListCalled = 0; uint32_t pollForCompletionCalled = 0; mutable uint32_t checkGpuHangDetectedCalled = 0; int ensureCommandBufferAllocationCalled = 0; DispatchFlags recordedDispatchFlags; BlitPropertiesContainer receivedBlitProperties = {}; uint32_t createAllocationForHostSurfaceCalled = 0; WaitStatus returnWaitForCompletionWithTimeout = WaitStatus::Ready; std::optional waitForTaskCountWithKmdNotifyFallbackReturnValue{}; std::optional flushReturnValue{}; CommandStreamReceiverType commandStreamReceiverType = CommandStreamReceiverType::CSR_HW; uint32_t downloadAllocationsCalledCount = 0; bool cpuCopyForHostPtrSurfaceAllowed = false; bool createPageTableManagerCalled = false; bool recordFlusheBatchBuffer = false; bool checkAndActivateAubSubCaptureCalled = false; bool addAubCommentCalled = false; std::atomic_bool downloadAllocationCalled = false; std::atomic_bool downloadAllocationsCalled = false; bool flushBatchedSubmissionsCalled = false; bool flushTagUpdateCalled = false; bool initProgrammingFlagsCalled = false; bool multiOsContextCapable = false; bool memoryCompressionEnabled = false; bool directSubmissionAvailable = false; bool blitterDirectSubmissionAvailable = false; bool callBaseIsMultiOsContextCapable = false; bool callBaseWaitForCompletionWithTimeout = true; bool shouldFailFlushBatchedSubmissions = false; bool shouldFlushBatchedSubmissionsReturnSuccess = false; bool callBaseFillReusableAllocationsList = false; bool callBaseFlushBcsTask{true}; bool callBaseSendRenderStateCacheFlush = true; bool forceReturnGpuHang = false; bool callBaseIsKmdWaitOnTaskCountAllowed = false; bool isKmdWaitOnTaskCountAllowedValue = false; bool stopDirectSubmissionCalled = false; }; } // namespace NEO