Files
compute-runtime/shared/test/common/mocks/mock_command_stream_receiver.h
Dominik Dabek 2b964254d6 performance: debug key for adjust ULLS on battery
ULLS controller timeout settings will be adjusted based on ac line
status and lowest queue throttle from submissions.

Lowest queue throttle is reset when controller stops ULLS.

Related-To: NEO-10800

Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
2024-03-22 14:24:00 +01:00

390 lines
17 KiB
C++

/*
* Copyright (C) 2020-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/command_stream/command_stream_receiver_hw.h"
#include "shared/source/command_stream/submission_status.h"
#include "shared/source/command_stream/submissions_aggregator.h"
#include "shared/source/command_stream/wait_status.h"
#include "shared/source/execution_environment/execution_environment.h"
#include "shared/source/helpers/flush_stamp.h"
#include "shared/source/helpers/hw_info.h"
#include "shared/source/helpers/string.h"
#include "shared/source/memory_manager/graphics_allocation.h"
#include "shared/source/memory_manager/surface.h"
#include "shared/source/utilities/tag_allocator.h"
#include "shared/test/common/helpers/dispatch_flags_helper.h"
#include <optional>
#include <vector>
namespace NEO {
struct CommandBuffer;
}
using namespace NEO;
class MockCommandStreamReceiver : public CommandStreamReceiver {
public:
using CommandStreamReceiver::activePartitions;
using CommandStreamReceiver::baseWaitFunction;
using CommandStreamReceiver::checkForNewResources;
using CommandStreamReceiver::checkImplicitFlushForGpuIdle;
using CommandStreamReceiver::cleanupResources;
using CommandStreamReceiver::CommandStreamReceiver;
using CommandStreamReceiver::globalFenceAllocation;
using CommandStreamReceiver::gpuHangCheckPeriod;
using CommandStreamReceiver::immWritePostSyncWriteOffset;
using CommandStreamReceiver::internalAllocationStorage;
using CommandStreamReceiver::latestFlushedTaskCount;
using CommandStreamReceiver::latestSentTaskCount;
using CommandStreamReceiver::localMemoryEnabled;
using CommandStreamReceiver::newResources;
using CommandStreamReceiver::numClients;
using CommandStreamReceiver::osContext;
using CommandStreamReceiver::ownershipMutex;
using CommandStreamReceiver::preemptionAllocation;
using CommandStreamReceiver::requiresInstructionCacheFlush;
using CommandStreamReceiver::tagAddress;
using CommandStreamReceiver::tagsMultiAllocation;
using CommandStreamReceiver::taskCount;
using CommandStreamReceiver::timestampPacketAllocator;
using CommandStreamReceiver::timeStampPostSyncWriteOffset;
using CommandStreamReceiver::useGpuIdleImplicitFlush;
using CommandStreamReceiver::useNewResourceImplicitFlush;
MockCommandStreamReceiver(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield)
: CommandStreamReceiver(executionEnvironment, rootDeviceIndex, deviceBitfield) {
CommandStreamReceiver::tagAddress = &mockTagAddress[0];
memset(const_cast<TagAddressType *>(CommandStreamReceiver::tagAddress), 0xFFFFFFFF, tagSize * sizeof(TagAddressType));
gpuHangCheckPeriod = {};
}
WaitStatus waitForCompletionWithTimeout(const WaitParams &params, TaskCountType taskCountToWait) override {
waitForCompletionWithTimeoutCalled++;
return waitForCompletionWithTimeoutReturnValue;
}
void fillReusableAllocationsList() override {
fillReusableAllocationsListCalled++;
}
SubmissionStatus flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override;
SubmissionStatus flushTagUpdate() override { return SubmissionStatus::success; };
void updateTagFromWait() override{};
bool isUpdateTagFromWaitEnabled() override { return false; };
void writeMemoryAub(aub_stream::AllocationParams &allocationParams) override {
writeMemoryAubCalled++;
}
bool isMultiOsContextCapable() const override { return multiOsContextCapable; }
bool isGpuHangDetected() const override {
if (isGpuHangDetectedReturnValue.has_value()) {
return *isGpuHangDetectedReturnValue;
} else {
return CommandStreamReceiver::isGpuHangDetected();
}
}
bool testTaskCountReady(volatile TagAddressType *pollAddress, TaskCountType taskCountToWait) override {
if (testTaskCountReadyReturnValue.has_value()) {
return *testTaskCountReadyReturnValue;
} else {
return CommandStreamReceiver::testTaskCountReady(pollAddress, taskCountToWait);
}
}
MemoryCompressionState getMemoryCompressionState(bool auxTranslationRequired) const override {
return MemoryCompressionState::notApplicable;
};
TagAllocatorBase *getTimestampPacketAllocator() override { return nullptr; }
std::unique_ptr<TagAllocatorBase> createMultiRootDeviceTimestampPacketAllocator(const RootDeviceIndicesContainer rootDeviceIndices) override { return std::unique_ptr<TagAllocatorBase>(nullptr); }
CompletionStamp flushTask(
LinearStream &commandStream,
size_t commandStreamStart,
const IndirectHeap *dsh,
const IndirectHeap *ioh,
const IndirectHeap *ssh,
TaskCountType taskLevel,
DispatchFlags &dispatchFlags,
Device &device) override;
CompletionStamp flushTaskStateless(
LinearStream &commandStream,
size_t commandStreamStart,
const IndirectHeap *dsh,
const IndirectHeap *ioh,
const IndirectHeap *ssh,
TaskCountType taskLevel,
DispatchFlags &dispatchFlags,
Device &device) override;
CompletionStamp flushImmediateTask(
LinearStream &immediateCommandStream,
size_t immediateCommandStreamStart,
ImmediateDispatchFlags &dispatchFlags,
Device &device) override;
CompletionStamp flushBcsTask(LinearStream &commandStreamTask, size_t commandStreamTaskStart,
const DispatchBcsFlags &dispatchBcsFlags, const HardwareInfo &hwInfo) override;
SubmissionStatus sendRenderStateCacheFlush() override {
return SubmissionStatus::success;
}
bool flushBatchedSubmissions() override {
if (flushBatchedSubmissionsCallCounter) {
(*flushBatchedSubmissionsCallCounter)++;
}
return true;
}
WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, QueueThrottle throttle) override {
return WaitStatus::ready;
}
WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, bool forcePowerSavingMode) {
return WaitStatus::ready;
}
TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { return taskCount; };
CommandStreamReceiverType getType() const override {
return commandStreamReceiverType;
}
void downloadAllocations() override {
downloadAllocationsCalledCount++;
}
void programHardwareContext(LinearStream &cmdStream) override {
programHardwareContextCalled = true;
}
size_t getCmdsSizeForHardwareContext() const override {
return 0;
}
void programComputeBarrierCommand(LinearStream &cmdStream) override {
programComputeBarrierCommandCalled = true;
}
size_t getCmdsSizeForComputeBarrierCommand() const override {
return 0;
}
void programStallingCommandsForBarrier(LinearStream &cmdStream, TimestampPacketContainer *barrierTimestampPacketNodes, const bool isDcFlushRequired) override {
programStallingCommandsForBarrierCalled = true;
}
bool createPreemptionAllocation() override {
if (createPreemptionAllocationParentCall) {
return CommandStreamReceiver::createPreemptionAllocation();
}
return createPreemptionAllocationReturn;
}
GraphicsAllocation *getClearColorAllocation() override { return nullptr; }
void makeResident(GraphicsAllocation &gfxAllocation) override {
makeResidentCalledTimes++;
if (makeResidentParentCall) {
return CommandStreamReceiver::makeResident(gfxAllocation);
}
}
std::unique_lock<CommandStreamReceiver::MutexType> obtainHostPtrSurfaceCreationLock() override {
++hostPtrSurfaceCreationMutexLockCount;
return CommandStreamReceiver::obtainHostPtrSurfaceCreationLock();
}
bool createAllocationForHostSurface(HostPtrSurface &surface, bool requiresL3Flush) override {
bool status = CommandStreamReceiver::createAllocationForHostSurface(surface, requiresL3Flush);
if (status)
surface.getAllocation()->hostPtrTaskCountAssignment--;
return status;
}
void postInitFlagsSetup() override {}
bool isOwnershipMutexLocked() {
bool isLocked = !this->ownershipMutex.try_lock();
if (!isLocked) {
this->ownershipMutex.unlock();
}
return isLocked;
}
SubmissionStatus initializeDeviceWithFirstSubmission(Device &device) override { return SubmissionStatus::success; }
QueueThrottle getLastDirectSubmissionThrottle() override {
return getLastDirectSubmissionThrottleReturnValue;
}
bool getAcLineConnected(bool updateStatus) const override {
return getAcLineConnectedReturnValue;
}
static constexpr size_t tagSize = 256;
static volatile TagAddressType mockTagAddress[tagSize];
std::vector<char> instructionHeapReserveredData;
int *flushBatchedSubmissionsCallCounter = nullptr;
uint32_t waitForCompletionWithTimeoutCalled = 0;
uint32_t fillReusableAllocationsListCalled = 0;
uint32_t writeMemoryAubCalled = 0;
uint32_t makeResidentCalledTimes = 0;
uint32_t downloadAllocationsCalledCount = 0;
int hostPtrSurfaceCreationMutexLockCount = 0;
bool multiOsContextCapable = false;
bool memoryCompressionEnabled = false;
bool programHardwareContextCalled = false;
bool createPreemptionAllocationReturn = true;
bool createPreemptionAllocationParentCall = false;
bool makeResidentParentCall = false;
bool programComputeBarrierCommandCalled = false;
bool programStallingCommandsForBarrierCalled = false;
std::optional<bool> isGpuHangDetectedReturnValue{};
std::optional<bool> testTaskCountReadyReturnValue{};
WaitStatus waitForCompletionWithTimeoutReturnValue{WaitStatus::ready};
CommandStreamReceiverType commandStreamReceiverType = CommandStreamReceiverType::CSR_HW;
BatchBuffer latestFlushedBatchBuffer = {};
QueueThrottle getLastDirectSubmissionThrottleReturnValue = QueueThrottle::MEDIUM;
bool getAcLineConnectedReturnValue = true;
};
class MockCommandStreamReceiverWithFailingSubmitBatch : public MockCommandStreamReceiver {
public:
MockCommandStreamReceiverWithFailingSubmitBatch(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield)
: MockCommandStreamReceiver(executionEnvironment, rootDeviceIndex, deviceBitfield) {}
SubmissionStatus submitBatchBuffer(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override {
return SubmissionStatus::failed;
}
};
class MockCommandStreamReceiverWithOutOfMemorySubmitBatch : public MockCommandStreamReceiver {
public:
MockCommandStreamReceiverWithOutOfMemorySubmitBatch(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield)
: MockCommandStreamReceiver(executionEnvironment, rootDeviceIndex, deviceBitfield) {}
SubmissionStatus submitBatchBuffer(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override {
return SubmissionStatus::outOfMemory;
}
};
class MockCommandStreamReceiverWithFailingFlush : public MockCommandStreamReceiver {
public:
MockCommandStreamReceiverWithFailingFlush(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield)
: MockCommandStreamReceiver(executionEnvironment, rootDeviceIndex, deviceBitfield) {}
SubmissionStatus flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override {
return SubmissionStatus::failed;
}
};
template <typename GfxFamily>
class MockCsrHw2 : public CommandStreamReceiverHw<GfxFamily> {
public:
using CommandStreamReceiverHw<GfxFamily>::CommandStreamReceiverHw;
using CommandStreamReceiverHw<GfxFamily>::csrSizeRequestFlags;
using CommandStreamReceiverHw<GfxFamily>::flushStamp;
using CommandStreamReceiverHw<GfxFamily>::postInitFlagsSetup;
using CommandStreamReceiverHw<GfxFamily>::programL3;
using CommandStreamReceiverHw<GfxFamily>::programVFEState;
using CommandStreamReceiver::activePartitions;
using CommandStreamReceiver::activePartitionsConfig;
using CommandStreamReceiver::clearColorAllocation;
using CommandStreamReceiver::commandStream;
using CommandStreamReceiver::dispatchMode;
using CommandStreamReceiver::feSupportFlags;
using CommandStreamReceiver::globalFenceAllocation;
using CommandStreamReceiver::heapStorageRequiresRecyclingTag;
using CommandStreamReceiver::immWritePostSyncWriteOffset;
using CommandStreamReceiver::isPreambleSent;
using CommandStreamReceiver::latestFlushedTaskCount;
using CommandStreamReceiver::mediaVfeStateDirty;
using CommandStreamReceiver::nTo1SubmissionModelEnabled;
using CommandStreamReceiver::pageTableManagerInitialized;
using CommandStreamReceiver::requiredScratchSlot0Size;
using CommandStreamReceiver::sbaSupportFlags;
using CommandStreamReceiver::streamProperties;
using CommandStreamReceiver::tagAddress;
using CommandStreamReceiver::taskCount;
using CommandStreamReceiver::taskLevel;
using CommandStreamReceiver::timestampPacketWriteEnabled;
using CommandStreamReceiver::timeStampPostSyncWriteOffset;
using CommandStreamReceiver::useGpuIdleImplicitFlush;
using CommandStreamReceiver::useNewResourceImplicitFlush;
MockCsrHw2(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield)
: CommandStreamReceiverHw<GfxFamily>::CommandStreamReceiverHw(executionEnvironment, rootDeviceIndex, deviceBitfield) {
}
SubmissionAggregator *peekSubmissionAggregator() {
return this->submissionAggregator.get();
}
void overrideSubmissionAggregator(SubmissionAggregator *newSubmissionsAggregator) {
this->submissionAggregator.reset(newSubmissionsAggregator);
}
uint64_t peekTotalMemoryUsed() {
return this->totalMemoryUsed;
}
bool peekMediaVfeStateDirty() const { return mediaVfeStateDirty; }
SubmissionStatus flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override {
flushCalledCount++;
if (recordedCommandBuffer) {
recordedCommandBuffer->batchBuffer = batchBuffer;
}
copyOfAllocations = allocationsForResidency;
flushStamp->setStamp(flushStamp->peekStamp() + 1);
return SubmissionStatus::success;
}
CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart,
const IndirectHeap *dsh, const IndirectHeap *ioh,
const IndirectHeap *ssh, TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override {
passedDispatchFlags = dispatchFlags;
recordedCommandBuffer = std::unique_ptr<CommandBuffer>(new CommandBuffer(device));
auto completionStamp = CommandStreamReceiverHw<GfxFamily>::flushTask(commandStream, commandStreamStart,
dsh, ioh, ssh, taskLevel, dispatchFlags, device);
if (storeFlushedTaskStream && commandStream.getUsed() > commandStreamStart) {
storedTaskStreamSize = commandStream.getUsed() - commandStreamStart;
// Overfetch to allow command parser verify if "big" command is programmed at the end of allocation
auto overfetchedSize = storedTaskStreamSize + MemoryConstants::cacheLineSize;
storedTaskStream.reset(new uint8_t[overfetchedSize]);
memset(storedTaskStream.get(), 0, overfetchedSize);
memcpy_s(storedTaskStream.get(), storedTaskStreamSize,
ptrOffset(commandStream.getCpuBase(), commandStreamStart), storedTaskStreamSize);
}
return completionStamp;
}
TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override {
if (!skipBlitCalls) {
return CommandStreamReceiverHw<GfxFamily>::flushBcsTask(blitPropertiesContainer, blocking, profilingEnabled, device);
}
return taskCount;
}
void programHardwareContext(LinearStream &cmdStream) override {
programHardwareContextCalled = true;
}
bool skipBlitCalls = false;
bool storeFlushedTaskStream = false;
std::unique_ptr<uint8_t[]> storedTaskStream;
size_t storedTaskStreamSize = 0;
int flushCalledCount = 0;
std::unique_ptr<CommandBuffer> recordedCommandBuffer = nullptr;
ResidencyContainer copyOfAllocations;
DispatchFlags passedDispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
bool programHardwareContextCalled = false;
};