/* * Copyright (C) 2020-2024 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/command_stream/submission_status.h" #include "shared/source/command_stream/submissions_aggregator.h" #include "shared/source/command_stream/wait_status.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/helpers/flush_stamp.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/surface.h" #include "shared/source/utilities/tag_allocator.h" #include "shared/test/common/helpers/dispatch_flags_helper.h" #include #include namespace NEO { struct CommandBuffer; } using namespace NEO; class MockCommandStreamReceiver : public CommandStreamReceiver { public: using CommandStreamReceiver::activePartitions; using CommandStreamReceiver::baseWaitFunction; using CommandStreamReceiver::checkForNewResources; using CommandStreamReceiver::checkImplicitFlushForGpuIdle; using CommandStreamReceiver::cleanupResources; using CommandStreamReceiver::CommandStreamReceiver; using CommandStreamReceiver::globalFenceAllocation; using CommandStreamReceiver::gpuHangCheckPeriod; using CommandStreamReceiver::immWritePostSyncWriteOffset; using CommandStreamReceiver::internalAllocationStorage; using CommandStreamReceiver::latestFlushedTaskCount; using CommandStreamReceiver::latestSentTaskCount; using CommandStreamReceiver::localMemoryEnabled; using CommandStreamReceiver::newResources; using CommandStreamReceiver::numClients; using CommandStreamReceiver::osContext; using CommandStreamReceiver::ownershipMutex; using CommandStreamReceiver::preemptionAllocation; using CommandStreamReceiver::requiresInstructionCacheFlush; using CommandStreamReceiver::tagAddress; using CommandStreamReceiver::tagsMultiAllocation; using CommandStreamReceiver::taskCount; using CommandStreamReceiver::timestampPacketAllocator; using CommandStreamReceiver::timeStampPostSyncWriteOffset; using CommandStreamReceiver::useGpuIdleImplicitFlush; using CommandStreamReceiver::useNewResourceImplicitFlush; MockCommandStreamReceiver(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) : CommandStreamReceiver(executionEnvironment, rootDeviceIndex, deviceBitfield) { CommandStreamReceiver::tagAddress = &mockTagAddress[0]; memset(const_cast(CommandStreamReceiver::tagAddress), 0xFFFFFFFF, tagSize * sizeof(TagAddressType)); gpuHangCheckPeriod = {}; } WaitStatus waitForCompletionWithTimeout(const WaitParams ¶ms, TaskCountType taskCountToWait) override { waitForCompletionWithTimeoutCalled++; return waitForCompletionWithTimeoutReturnValue; } void fillReusableAllocationsList() override { fillReusableAllocationsListCalled++; } SubmissionStatus flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override; SubmissionStatus flushTagUpdate() override { return SubmissionStatus::success; }; void updateTagFromWait() override{}; bool submitDependencyUpdate(TagNodeBase *tag) override { submitDependencyUpdateCalledTimes++; return submitDependencyUpdateReturnValue; } bool isUpdateTagFromWaitEnabled() override { return false; }; void writeMemoryAub(aub_stream::AllocationParams &allocationParams) override { writeMemoryAubCalled++; } bool isMultiOsContextCapable() const override { return multiOsContextCapable; } bool isGpuHangDetected() const override { if (isGpuHangDetectedReturnValue.has_value()) { return *isGpuHangDetectedReturnValue; } else { return CommandStreamReceiver::isGpuHangDetected(); } } bool testTaskCountReady(volatile TagAddressType *pollAddress, TaskCountType taskCountToWait) override { if (testTaskCountReadyReturnValue.has_value()) { return *testTaskCountReadyReturnValue; } else { return CommandStreamReceiver::testTaskCountReady(pollAddress, taskCountToWait); } } MemoryCompressionState getMemoryCompressionState(bool auxTranslationRequired) const override { return MemoryCompressionState::notApplicable; }; TagAllocatorBase *getTimestampPacketAllocator() override { return nullptr; } std::unique_ptr createMultiRootDeviceTimestampPacketAllocator(const RootDeviceIndicesContainer rootDeviceIndices) override { return std::unique_ptr(nullptr); } CompletionStamp flushTask( LinearStream &commandStream, size_t commandStreamStart, const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override; CompletionStamp flushTaskStateless( LinearStream &commandStream, size_t commandStreamStart, const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override; CompletionStamp flushImmediateTask( LinearStream &immediateCommandStream, size_t immediateCommandStreamStart, ImmediateDispatchFlags &dispatchFlags, Device &device) override; CompletionStamp flushImmediateTaskStateless( LinearStream &immediateCommandStream, size_t immediateCommandStreamStart, ImmediateDispatchFlags &dispatchFlags, Device &device) override; CompletionStamp flushBcsTask(LinearStream &commandStreamTask, size_t commandStreamTaskStart, const DispatchBcsFlags &dispatchBcsFlags, const HardwareInfo &hwInfo) override; SubmissionStatus sendRenderStateCacheFlush() override { return SubmissionStatus::success; } bool flushBatchedSubmissions() override { if (flushBatchedSubmissionsCallCounter) { (*flushBatchedSubmissionsCallCounter)++; } return true; } WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, QueueThrottle throttle) override { return WaitStatus::ready; } WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, bool forcePowerSavingMode) { return WaitStatus::ready; } TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { return taskCount; }; CommandStreamReceiverType getType() const override { return commandStreamReceiverType; } void downloadAllocations() override { downloadAllocationsCalledCount++; } void programHardwareContext(LinearStream &cmdStream) override { programHardwareContextCalled = true; } size_t getCmdsSizeForHardwareContext() const override { return 0; } void programComputeBarrierCommand(LinearStream &cmdStream) override { programComputeBarrierCommandCalled = true; } size_t getCmdsSizeForComputeBarrierCommand() const override { return 0; } void programStallingCommandsForBarrier(LinearStream &cmdStream, TimestampPacketContainer *barrierTimestampPacketNodes, const bool isDcFlushRequired) override { programStallingCommandsForBarrierCalled = true; } void stopDirectSubmission(bool blocking) override { this->blockingStopDirectSubmissionCalled = blocking; } bool createPreemptionAllocation() override { if (createPreemptionAllocationParentCall) { return CommandStreamReceiver::createPreemptionAllocation(); } return createPreemptionAllocationReturn; } GraphicsAllocation *getClearColorAllocation() override { return nullptr; } void makeResident(GraphicsAllocation &gfxAllocation) override { makeResidentCalledTimes++; if (makeResidentParentCall) { return CommandStreamReceiver::makeResident(gfxAllocation); } } std::unique_lock obtainHostPtrSurfaceCreationLock() override { ++hostPtrSurfaceCreationMutexLockCount; return CommandStreamReceiver::obtainHostPtrSurfaceCreationLock(); } bool createAllocationForHostSurface(HostPtrSurface &surface, bool requiresL3Flush) override { bool status = CommandStreamReceiver::createAllocationForHostSurface(surface, requiresL3Flush); if (status) surface.getAllocation()->hostPtrTaskCountAssignment--; return status; } void postInitFlagsSetup() override {} bool isOwnershipMutexLocked() { bool isLocked = !this->ownershipMutex.try_lock(); if (!isLocked) { this->ownershipMutex.unlock(); } return isLocked; } SubmissionStatus initializeDeviceWithFirstSubmission(Device &device) override { return SubmissionStatus::success; } QueueThrottle getLastDirectSubmissionThrottle() override { return getLastDirectSubmissionThrottleReturnValue; } bool getAcLineConnected(bool updateStatus) const override { return getAcLineConnectedReturnValue; } static constexpr size_t tagSize = 256; static volatile TagAddressType mockTagAddress[tagSize]; std::vector instructionHeapReserveredData; int *flushBatchedSubmissionsCallCounter = nullptr; uint32_t waitForCompletionWithTimeoutCalled = 0; uint32_t fillReusableAllocationsListCalled = 0; uint32_t writeMemoryAubCalled = 0; uint32_t makeResidentCalledTimes = 0; uint32_t downloadAllocationsCalledCount = 0; uint32_t submitDependencyUpdateCalledTimes = 0; int hostPtrSurfaceCreationMutexLockCount = 0; bool multiOsContextCapable = false; bool memoryCompressionEnabled = false; bool programHardwareContextCalled = false; bool createPreemptionAllocationReturn = true; bool createPreemptionAllocationParentCall = false; bool makeResidentParentCall = false; bool programComputeBarrierCommandCalled = false; bool programStallingCommandsForBarrierCalled = false; bool blockingStopDirectSubmissionCalled = false; std::optional isGpuHangDetectedReturnValue{}; std::optional testTaskCountReadyReturnValue{}; WaitStatus waitForCompletionWithTimeoutReturnValue{WaitStatus::ready}; CommandStreamReceiverType commandStreamReceiverType = CommandStreamReceiverType::hardware; BatchBuffer latestFlushedBatchBuffer = {}; QueueThrottle getLastDirectSubmissionThrottleReturnValue = QueueThrottle::MEDIUM; bool getAcLineConnectedReturnValue = true; bool submitDependencyUpdateReturnValue = true; }; class MockCommandStreamReceiverWithFailingSubmitBatch : public MockCommandStreamReceiver { public: MockCommandStreamReceiverWithFailingSubmitBatch(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) : MockCommandStreamReceiver(executionEnvironment, rootDeviceIndex, deviceBitfield) {} SubmissionStatus submitBatchBuffer(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override { return SubmissionStatus::failed; } }; class MockCommandStreamReceiverWithOutOfMemorySubmitBatch : public MockCommandStreamReceiver { public: MockCommandStreamReceiverWithOutOfMemorySubmitBatch(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) : MockCommandStreamReceiver(executionEnvironment, rootDeviceIndex, deviceBitfield) {} SubmissionStatus submitBatchBuffer(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override { return SubmissionStatus::outOfMemory; } }; class MockCommandStreamReceiverWithFailingFlush : public MockCommandStreamReceiver { public: MockCommandStreamReceiverWithFailingFlush(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) : MockCommandStreamReceiver(executionEnvironment, rootDeviceIndex, deviceBitfield) {} SubmissionStatus flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override { return SubmissionStatus::failed; } }; template class MockCsrHw2 : public CommandStreamReceiverHw { public: using CommandStreamReceiverHw::CommandStreamReceiverHw; using CommandStreamReceiverHw::csrSizeRequestFlags; using CommandStreamReceiverHw::flushStamp; using CommandStreamReceiverHw::postInitFlagsSetup; using CommandStreamReceiverHw::programL3; using CommandStreamReceiverHw::programVFEState; using CommandStreamReceiver::activePartitions; using CommandStreamReceiver::activePartitionsConfig; using CommandStreamReceiver::clearColorAllocation; using CommandStreamReceiver::commandStream; using CommandStreamReceiver::dispatchMode; using CommandStreamReceiver::feSupportFlags; using CommandStreamReceiver::globalFenceAllocation; using CommandStreamReceiver::heapStorageRequiresRecyclingTag; using CommandStreamReceiver::immWritePostSyncWriteOffset; using CommandStreamReceiver::isPreambleSent; using CommandStreamReceiver::latestFlushedTaskCount; using CommandStreamReceiver::mediaVfeStateDirty; using CommandStreamReceiver::nTo1SubmissionModelEnabled; using CommandStreamReceiver::pageTableManagerInitialized; using CommandStreamReceiver::requiredScratchSlot0Size; using CommandStreamReceiver::sbaSupportFlags; using CommandStreamReceiver::streamProperties; using CommandStreamReceiver::tagAddress; using CommandStreamReceiver::taskCount; using CommandStreamReceiver::taskLevel; using CommandStreamReceiver::timestampPacketWriteEnabled; using CommandStreamReceiver::timeStampPostSyncWriteOffset; using CommandStreamReceiver::useGpuIdleImplicitFlush; using CommandStreamReceiver::useNewResourceImplicitFlush; MockCsrHw2(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) : CommandStreamReceiverHw::CommandStreamReceiverHw(executionEnvironment, rootDeviceIndex, deviceBitfield) { } SubmissionAggregator *peekSubmissionAggregator() { return this->submissionAggregator.get(); } void overrideSubmissionAggregator(SubmissionAggregator *newSubmissionsAggregator) { this->submissionAggregator.reset(newSubmissionsAggregator); } uint64_t peekTotalMemoryUsed() { return this->totalMemoryUsed; } bool peekMediaVfeStateDirty() const { return mediaVfeStateDirty; } SubmissionStatus flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override { flushCalledCount++; if (recordedCommandBuffer) { recordedCommandBuffer->batchBuffer = batchBuffer; } copyOfAllocations = allocationsForResidency; flushStamp->setStamp(flushStamp->peekStamp() + 1); return SubmissionStatus::success; } CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart, const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override { passedDispatchFlags = dispatchFlags; recordedCommandBuffer = std::unique_ptr(new CommandBuffer(device)); auto completionStamp = CommandStreamReceiverHw::flushTask(commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device); if (storeFlushedTaskStream && commandStream.getUsed() > commandStreamStart) { storedTaskStreamSize = commandStream.getUsed() - commandStreamStart; // Overfetch to allow command parser verify if "big" command is programmed at the end of allocation auto overfetchedSize = storedTaskStreamSize + MemoryConstants::cacheLineSize; storedTaskStream.reset(new uint8_t[overfetchedSize]); memset(storedTaskStream.get(), 0, overfetchedSize); memcpy_s(storedTaskStream.get(), storedTaskStreamSize, ptrOffset(commandStream.getCpuBase(), commandStreamStart), storedTaskStreamSize); } return completionStamp; } TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { if (!skipBlitCalls) { return CommandStreamReceiverHw::flushBcsTask(blitPropertiesContainer, blocking, profilingEnabled, device); } return taskCount; } void programHardwareContext(LinearStream &cmdStream) override { programHardwareContextCalled = true; } bool skipBlitCalls = false; bool storeFlushedTaskStream = false; std::unique_ptr storedTaskStream; size_t storedTaskStreamSize = 0; int flushCalledCount = 0; std::unique_ptr recordedCommandBuffer = nullptr; ResidencyContainer copyOfAllocations; DispatchFlags passedDispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); bool programHardwareContextCalled = false; };