compute-runtime/shared/test/common/mocks/mock_command_stream_receiver.h

453 lines
20 KiB
C++

/*
* Copyright (C) 2020-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/command_stream/command_stream_receiver_hw.h"
#include "shared/source/command_stream/submission_status.h"
#include "shared/source/command_stream/submissions_aggregator.h"
#include "shared/source/command_stream/wait_status.h"
#include "shared/source/execution_environment/execution_environment.h"
#include "shared/source/helpers/flush_stamp.h"
#include "shared/source/helpers/hw_info.h"
#include "shared/source/helpers/string.h"
#include "shared/source/memory_manager/graphics_allocation.h"
#include "shared/source/memory_manager/surface.h"
#include "shared/source/utilities/tag_allocator.h"
#include "shared/test/common/helpers/dispatch_flags_helper.h"
#include "shared/test/common/test_macros/mock_method_macros.h"
#include <optional>
#include <vector>
namespace NEO {
struct CommandBuffer;
}
using namespace NEO;
class MockCommandStreamReceiver : public CommandStreamReceiver {
public:
using BaseClass = CommandStreamReceiver;
using CommandStreamReceiver::activePartitions;
using CommandStreamReceiver::baseWaitFunction;
using CommandStreamReceiver::checkForNewResources;
using CommandStreamReceiver::checkImplicitFlushForGpuIdle;
using CommandStreamReceiver::cleanupResources;
using CommandStreamReceiver::CommandStreamReceiver;
using CommandStreamReceiver::globalFenceAllocation;
using CommandStreamReceiver::gpuHangCheckPeriod;
using CommandStreamReceiver::heaplessStateInitialized;
using CommandStreamReceiver::immWritePostSyncWriteOffset;
using CommandStreamReceiver::internalAllocationStorage;
using CommandStreamReceiver::latestFlushedTaskCount;
using CommandStreamReceiver::latestSentTaskCount;
using CommandStreamReceiver::localMemoryEnabled;
using CommandStreamReceiver::newResources;
using CommandStreamReceiver::numClients;
using CommandStreamReceiver::osContext;
using CommandStreamReceiver::ownershipMutex;
using CommandStreamReceiver::preemptionAllocation;
using CommandStreamReceiver::primaryCsr;
using CommandStreamReceiver::requiresInstructionCacheFlush;
using CommandStreamReceiver::tagAddress;
using CommandStreamReceiver::tagsMultiAllocation;
using CommandStreamReceiver::taskCount;
using CommandStreamReceiver::timestampPacketAllocator;
using CommandStreamReceiver::timeStampPostSyncWriteOffset;
using CommandStreamReceiver::useGpuIdleImplicitFlush;
using CommandStreamReceiver::useNewResourceImplicitFlush;
MockCommandStreamReceiver(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield)
: CommandStreamReceiver(executionEnvironment, rootDeviceIndex, deviceBitfield) {
CommandStreamReceiver::tagAddress = &mockTagAddress[0];
memset(const_cast<TagAddressType *>(CommandStreamReceiver::tagAddress), 0xFFFFFFFF, tagSize * sizeof(TagAddressType));
gpuHangCheckPeriod = {};
}
WaitStatus waitForCompletionWithTimeout(const WaitParams &params, TaskCountType taskCountToWait) override {
waitForCompletionWithTimeoutCalled++;
return waitForCompletionWithTimeoutReturnValue;
}
void fillReusableAllocationsList() override {
fillReusableAllocationsListCalled++;
}
SubmissionStatus flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override;
SubmissionStatus flushTagUpdate() override { return SubmissionStatus::success; };
void updateTagFromWait() override{};
bool submitDependencyUpdate(TagNodeBase *tag) override {
submitDependencyUpdateCalledTimes++;
return submitDependencyUpdateReturnValue;
}
bool isUpdateTagFromWaitEnabled() override { return false; };
void writeMemoryAub(aub_stream::AllocationParams &allocationParams) override {
writeMemoryAubCalled++;
}
void initializeEngine() override {
initializeEngineCalled++;
}
bool isMultiOsContextCapable() const override { return multiOsContextCapable; }
bool isGpuHangDetected() const override {
if (isGpuHangDetectedReturnValue.has_value()) {
return *isGpuHangDetectedReturnValue;
} else {
return CommandStreamReceiver::isGpuHangDetected();
}
}
bool testTaskCountReady(volatile TagAddressType *pollAddress, TaskCountType taskCountToWait) override {
if (testTaskCountReadyReturnValue.has_value()) {
return *testTaskCountReadyReturnValue;
} else {
return CommandStreamReceiver::testTaskCountReady(pollAddress, taskCountToWait);
}
}
MemoryCompressionState getMemoryCompressionState(bool auxTranslationRequired) const override {
return MemoryCompressionState::notApplicable;
};
TagAllocatorBase *getTimestampPacketAllocator() override { return nullptr; }
std::unique_ptr<TagAllocatorBase> createMultiRootDeviceTimestampPacketAllocator(const RootDeviceIndicesContainer &rootDeviceIndices) override { return std::unique_ptr<TagAllocatorBase>(nullptr); }
CompletionStamp flushTask(
LinearStream &commandStream,
size_t commandStreamStart,
const IndirectHeap *dsh,
const IndirectHeap *ioh,
const IndirectHeap *ssh,
TaskCountType taskLevel,
DispatchFlags &dispatchFlags,
Device &device) override;
CompletionStamp flushTaskStateless(
LinearStream &commandStream,
size_t commandStreamStart,
const IndirectHeap *dsh,
const IndirectHeap *ioh,
const IndirectHeap *ssh,
TaskCountType taskLevel,
DispatchFlags &dispatchFlags,
Device &device) override;
CompletionStamp flushImmediateTask(
LinearStream &immediateCommandStream,
size_t immediateCommandStreamStart,
ImmediateDispatchFlags &dispatchFlags,
Device &device) override;
CompletionStamp flushImmediateTaskStateless(
LinearStream &immediateCommandStream,
size_t immediateCommandStreamStart,
ImmediateDispatchFlags &dispatchFlags,
Device &device) override;
CompletionStamp flushBcsTask(LinearStream &commandStreamTask, size_t commandStreamTaskStart,
const DispatchBcsFlags &dispatchBcsFlags, const HardwareInfo &hwInfo) override;
SubmissionStatus sendRenderStateCacheFlush() override {
return SubmissionStatus::success;
}
bool flushBatchedSubmissions() override {
if (flushBatchedSubmissionsCallCounter) {
(*flushBatchedSubmissionsCallCounter)++;
}
return true;
}
WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, QueueThrottle throttle) override {
return WaitStatus::ready;
}
WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, bool forcePowerSavingMode) {
return WaitStatus::ready;
}
TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, Device &device) override { return taskCount; };
CommandStreamReceiverType getType() const override {
return commandStreamReceiverType;
}
void downloadAllocations(bool blockingWait, TaskCountType taskCount) override {
downloadAllocationsCalledCount++;
}
void programHardwareContext(LinearStream &cmdStream) override {
programHardwareContextCalled = true;
}
size_t getCmdsSizeForHardwareContext() const override {
return 0;
}
void programComputeBarrierCommand(LinearStream &cmdStream) override {
programComputeBarrierCommandCalled = true;
}
size_t getCmdsSizeForComputeBarrierCommand() const override {
return 0;
}
void programStallingCommandsForBarrier(LinearStream &cmdStream, TimestampPacketContainer *barrierTimestampPacketNodes, const bool isDcFlushRequired) override {
programStallingCommandsForBarrierCalled = true;
}
void stopDirectSubmission(bool blocking, bool needsLock) override {
this->blockingStopDirectSubmissionCalled = blocking;
stopDirectSubmissionCalledTimes++;
}
bool createPreemptionAllocation() override {
if (createPreemptionAllocationParentCall) {
return CommandStreamReceiver::createPreemptionAllocation();
}
return createPreemptionAllocationReturn;
}
GraphicsAllocation *getClearColorAllocation() override { return nullptr; }
void makeResident(GraphicsAllocation &gfxAllocation) override {
makeResidentCalledTimes++;
if (makeResidentParentCall) {
return CommandStreamReceiver::makeResident(gfxAllocation);
}
}
std::unique_lock<CommandStreamReceiver::MutexType> obtainHostPtrSurfaceCreationLock() override {
++hostPtrSurfaceCreationMutexLockCount;
return CommandStreamReceiver::obtainHostPtrSurfaceCreationLock();
}
bool createAllocationForHostSurface(HostPtrSurface &surface, bool requiresL3Flush) override {
bool status = CommandStreamReceiver::createAllocationForHostSurface(surface, requiresL3Flush);
if (status)
surface.getAllocation()->hostPtrTaskCountAssignment--;
return status;
}
void postInitFlagsSetup() override {}
bool isOwnershipMutexLocked() {
bool isLocked = !this->ownershipMutex.try_lock();
if (!isLocked) {
this->ownershipMutex.unlock();
}
return isLocked;
}
SubmissionStatus initializeDeviceWithFirstSubmission(Device &device) override { return SubmissionStatus::success; }
QueueThrottle getLastDirectSubmissionThrottle() override {
return getLastDirectSubmissionThrottleReturnValue;
}
bool getAcLineConnected(bool updateStatus) const override {
return getAcLineConnectedReturnValue;
}
void unblockPagingFenceSemaphore(uint64_t pagingFenceValue) override {
this->pagingFenceValueToUnblock = pagingFenceValue;
}
static constexpr size_t tagSize = 256;
static volatile TagAddressType mockTagAddress[tagSize];
std::vector<char> instructionHeapReserveredData;
int *flushBatchedSubmissionsCallCounter = nullptr;
uint32_t waitForCompletionWithTimeoutCalled = 0;
uint32_t fillReusableAllocationsListCalled = 0;
uint32_t writeMemoryAubCalled = 0;
uint32_t initializeEngineCalled = 0;
uint32_t makeResidentCalledTimes = 0;
uint32_t downloadAllocationsCalledCount = 0;
uint32_t submitDependencyUpdateCalledTimes = 0;
uint32_t stopDirectSubmissionCalledTimes = 0;
int hostPtrSurfaceCreationMutexLockCount = 0;
bool multiOsContextCapable = false;
bool memoryCompressionEnabled = false;
bool programHardwareContextCalled = false;
bool createPreemptionAllocationReturn = true;
bool createPreemptionAllocationParentCall = false;
bool makeResidentParentCall = false;
bool programComputeBarrierCommandCalled = false;
bool programStallingCommandsForBarrierCalled = false;
bool blockingStopDirectSubmissionCalled = false;
std::optional<bool> isGpuHangDetectedReturnValue{};
std::optional<bool> testTaskCountReadyReturnValue{};
WaitStatus waitForCompletionWithTimeoutReturnValue{WaitStatus::ready};
CommandStreamReceiverType commandStreamReceiverType = CommandStreamReceiverType::hardware;
BatchBuffer latestFlushedBatchBuffer = {};
QueueThrottle getLastDirectSubmissionThrottleReturnValue = QueueThrottle::MEDIUM;
bool getAcLineConnectedReturnValue = true;
bool submitDependencyUpdateReturnValue = true;
std::atomic<uint64_t> pagingFenceValueToUnblock{0u};
};
class MockCommandStreamReceiverWithFailingSubmitBatch : public MockCommandStreamReceiver {
public:
MockCommandStreamReceiverWithFailingSubmitBatch(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield)
: MockCommandStreamReceiver(executionEnvironment, rootDeviceIndex, deviceBitfield) {}
SubmissionStatus submitBatchBuffer(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override {
return SubmissionStatus::failed;
}
};
class MockCommandStreamReceiverWithOutOfMemorySubmitBatch : public MockCommandStreamReceiver {
public:
MockCommandStreamReceiverWithOutOfMemorySubmitBatch(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield)
: MockCommandStreamReceiver(executionEnvironment, rootDeviceIndex, deviceBitfield) {}
SubmissionStatus submitBatchBuffer(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override {
return SubmissionStatus::outOfMemory;
}
};
class MockCommandStreamReceiverWithFailingFlush : public MockCommandStreamReceiver {
public:
MockCommandStreamReceiverWithFailingFlush(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield)
: MockCommandStreamReceiver(executionEnvironment, rootDeviceIndex, deviceBitfield) {}
SubmissionStatus flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override {
return SubmissionStatus::failed;
}
};
template <bool isRelaxedOrderingEnabled>
class MockCommandStreamReceiverWithDirectSubmissionRelaxedOrdering : public MockCommandStreamReceiver {
public:
using MockCommandStreamReceiver::MockCommandStreamReceiver;
bool directSubmissionRelaxedOrderingEnabled() const override {
return isRelaxedOrderingEnabled;
}
};
template <typename GfxFamily>
class MockCsrHw2 : public CommandStreamReceiverHw<GfxFamily> {
public:
using CommandStreamReceiverHw<GfxFamily>::CommandStreamReceiverHw;
using CommandStreamReceiverHw<GfxFamily>::csrSizeRequestFlags;
using CommandStreamReceiverHw<GfxFamily>::flushStamp;
using CommandStreamReceiverHw<GfxFamily>::postInitFlagsSetup;
using CommandStreamReceiverHw<GfxFamily>::programL3;
using CommandStreamReceiverHw<GfxFamily>::programVFEState;
using CommandStreamReceiverHw<GfxFamily>::directSubmission;
using CommandStreamReceiver::activePartitions;
using CommandStreamReceiver::activePartitionsConfig;
using CommandStreamReceiver::clearColorAllocation;
using CommandStreamReceiver::commandStream;
using CommandStreamReceiver::dispatchMode;
using CommandStreamReceiver::feSupportFlags;
using CommandStreamReceiver::globalFenceAllocation;
using CommandStreamReceiver::heaplessStateInitialized;
using CommandStreamReceiver::heapStorageRequiresRecyclingTag;
using CommandStreamReceiver::immWritePostSyncWriteOffset;
using CommandStreamReceiver::isPreambleSent;
using CommandStreamReceiver::latestFlushedTaskCount;
using CommandStreamReceiver::mediaVfeStateDirty;
using CommandStreamReceiver::nTo1SubmissionModelEnabled;
using CommandStreamReceiver::pageTableManagerInitialized;
using CommandStreamReceiver::requiredScratchSlot0Size;
using CommandStreamReceiver::sbaSupportFlags;
using CommandStreamReceiver::streamProperties;
using CommandStreamReceiver::tagAddress;
using CommandStreamReceiver::taskCount;
using CommandStreamReceiver::taskLevel;
using CommandStreamReceiver::timestampPacketWriteEnabled;
using CommandStreamReceiver::timeStampPostSyncWriteOffset;
using CommandStreamReceiver::useGpuIdleImplicitFlush;
using CommandStreamReceiver::useNewResourceImplicitFlush;
MockCsrHw2(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield)
: CommandStreamReceiverHw<GfxFamily>::CommandStreamReceiverHw(executionEnvironment, rootDeviceIndex, deviceBitfield) {
}
SubmissionAggregator *peekSubmissionAggregator() {
return this->submissionAggregator.get();
}
void overrideSubmissionAggregator(SubmissionAggregator *newSubmissionsAggregator) {
this->submissionAggregator.reset(newSubmissionsAggregator);
}
uint64_t peekTotalMemoryUsed() {
return this->totalMemoryUsed;
}
bool peekMediaVfeStateDirty() const { return mediaVfeStateDirty; }
SubmissionStatus flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override {
flushCalledCount++;
if (recordedCommandBuffer) {
recordedCommandBuffer->batchBuffer = batchBuffer;
}
copyOfAllocations = allocationsForResidency;
flushStamp->setStamp(flushStamp->peekStamp() + 1);
return SubmissionStatus::success;
}
CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart,
const IndirectHeap *dsh, const IndirectHeap *ioh,
const IndirectHeap *ssh, TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override {
passedDispatchFlags = dispatchFlags;
recordedCommandBuffer = std::unique_ptr<CommandBuffer>(new CommandBuffer(device));
auto completionStamp = CommandStreamReceiverHw<GfxFamily>::flushTask(commandStream, commandStreamStart,
dsh, ioh, ssh, taskLevel, dispatchFlags, device);
storeCommandStream(commandStream, commandStreamStart);
return completionStamp;
}
CompletionStamp flushTaskStateless(LinearStream &commandStream, size_t commandStreamStart,
const IndirectHeap *dsh, const IndirectHeap *ioh,
const IndirectHeap *ssh, TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override {
passedDispatchFlags = dispatchFlags;
recordedCommandBuffer = std::unique_ptr<CommandBuffer>(new CommandBuffer(device));
auto completionStamp = CommandStreamReceiverHw<GfxFamily>::flushTaskStateless(commandStream, commandStreamStart,
dsh, ioh, ssh, taskLevel, dispatchFlags, device);
storeCommandStream(commandStream, commandStreamStart);
return completionStamp;
}
TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, Device &device) override {
if (!skipBlitCalls) {
return CommandStreamReceiverHw<GfxFamily>::flushBcsTask(blitPropertiesContainer, blocking, device);
}
return taskCount;
}
void programHardwareContext(LinearStream &cmdStream) override {
programHardwareContextCalled = true;
}
private:
void storeCommandStream(LinearStream &commandStream, size_t commandStreamStart) {
if (storeFlushedTaskStream && commandStream.getUsed() > commandStreamStart) {
storedTaskStreamSize = commandStream.getUsed() - commandStreamStart;
// Overfetch to allow command parser verify if "big" command is programmed at the end of allocation
auto overfetchedSize = storedTaskStreamSize + MemoryConstants::cacheLineSize;
storedTaskStream.reset(new uint8_t[overfetchedSize]);
memset(storedTaskStream.get(), 0, overfetchedSize);
memcpy_s(storedTaskStream.get(), storedTaskStreamSize,
ptrOffset(commandStream.getCpuBase(), commandStreamStart), storedTaskStreamSize);
}
}
public:
bool skipBlitCalls = false;
bool storeFlushedTaskStream = false;
std::unique_ptr<uint8_t[]> storedTaskStream;
size_t storedTaskStreamSize = 0;
uint32_t flushCalledCount = 0;
std::unique_ptr<CommandBuffer> recordedCommandBuffer = nullptr;
ResidencyContainer copyOfAllocations;
DispatchFlags passedDispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
bool programHardwareContextCalled = false;
};