Add helper functions for memory compression to CSR

Signed-off-by: Slawomir Milczarek <slawomir.milczarek@intel.com>
This commit is contained in:
Slawomir Milczarek
2020-12-17 00:36:45 +00:00
committed by Compute-Runtime-Automation
parent fb75390954
commit 6986d5de0b
23 changed files with 180 additions and 46 deletions

View File

@@ -189,6 +189,8 @@ class CommandStreamReceiver {
virtual bool isMultiOsContextCapable() const = 0;
virtual MemoryCompressionState getMemoryCompressionState(bool auxTranslationRequired) const = 0;
void setLatestSentTaskCount(uint32_t latestSentTaskCount) {
this->latestSentTaskCount = latestSentTaskCount;
}
@@ -293,6 +295,7 @@ class CommandStreamReceiver {
uint32_t requiredPrivateScratchSize = 0;
uint32_t lastAdditionalKernelExecInfo = AdditionalKernelExecInfo::NotSet;
KernelExecutionType lastKernelExecutionType = KernelExecutionType::Default;
MemoryCompressionState lastMemoryCompressionState = MemoryCompressionState::NotApplicable;
const uint32_t rootDeviceIndex;
const DeviceBitfield deviceBitfield;

View File

@@ -95,6 +95,8 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
bool isMultiOsContextCapable() const override;
MemoryCompressionState getMemoryCompressionState(bool auxTranslationRequired) const override;
bool isDirectSubmissionEnabled() const override {
return directSubmission.get() != nullptr;
}

View File

@@ -368,6 +368,13 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
bool sourceLevelDebuggerActive = device.getSourceLevelDebugger() != nullptr ? true : false;
if (dispatchFlags.memoryCompressionState != MemoryCompressionState::NotApplicable) {
if (lastMemoryCompressionState != dispatchFlags.memoryCompressionState) {
isStateBaseAddressDirty = true;
lastMemoryCompressionState = dispatchFlags.memoryCompressionState;
}
}
//Reprogram state base address if required
if (isStateBaseAddressDirty || sourceLevelDebuggerActive) {
addPipeControlBeforeStateBaseAddress(commandStreamCSR);
@@ -399,7 +406,8 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
instructionHeapBaseAddress,
true,
device.getGmmHelper(),
isMultiOsContextCapable());
isMultiOsContextCapable(),
dispatchFlags.memoryCompressionState);
*pCmd = cmd;
if (sshDirty) {
@@ -1068,6 +1076,11 @@ inline bool CommandStreamReceiverHw<GfxFamily>::isComputeModeNeeded() const {
return false;
}
template <typename GfxFamily>
inline MemoryCompressionState CommandStreamReceiverHw<GfxFamily>::getMemoryCompressionState(bool auxTranslationRequired) const {
return MemoryCompressionState::NotApplicable;
}
template <typename GfxFamily>
inline bool CommandStreamReceiverHw<GfxFamily>::isPipelineSelectAlreadyProgrammed() const {
auto &hwHelper = HwHelper::get(peekHwInfo().platform.eRenderCoreFamily);

View File

@@ -7,6 +7,7 @@
#pragma once
#include "shared/source/command_stream/csr_deps.h"
#include "shared/source/command_stream/memory_compression_state.h"
#include "shared/source/command_stream/queue_throttle.h"
#include "shared/source/command_stream/thread_arbitration_policy.h"
#include "shared/source/helpers/constants.h"
@@ -52,33 +53,35 @@ struct DispatchFlags {
DispatchFlags() = delete;
DispatchFlags(CsrDependencies csrDependencies, TimestampPacketContainer *barrierTimestampPacketNodes, PipelineSelectArgs pipelineSelectArgs,
FlushStampTrackingObj *flushStampReference, QueueThrottle throttle, PreemptionMode preemptionMode, uint32_t numGrfRequired,
uint32_t l3CacheSettings, uint32_t threadArbitrationPolicy, uint32_t additionalKernelExecInfo, KernelExecutionType kernelExecutionType, uint64_t sliceCount, bool blocking, bool dcFlush,
bool useSLM, bool guardCommandBufferWithPipeControl, bool gsba32BitRequired,
bool requiresCoherency, bool lowPriority, bool implicitFlush,
bool outOfOrderExecutionAllowed, bool epilogueRequired, bool usePerDSSbackedBuffer, bool useSingleSubdevice) : csrDependencies(csrDependencies),
barrierTimestampPacketNodes(barrierTimestampPacketNodes),
pipelineSelectArgs(pipelineSelectArgs),
flushStampReference(flushStampReference),
throttle(throttle),
preemptionMode(preemptionMode),
numGrfRequired(numGrfRequired),
l3CacheSettings(l3CacheSettings),
threadArbitrationPolicy(threadArbitrationPolicy),
additionalKernelExecInfo(additionalKernelExecInfo),
kernelExecutionType(kernelExecutionType),
sliceCount(sliceCount),
blocking(blocking),
dcFlush(dcFlush),
useSLM(useSLM),
guardCommandBufferWithPipeControl(guardCommandBufferWithPipeControl),
gsba32BitRequired(gsba32BitRequired),
requiresCoherency(requiresCoherency),
lowPriority(lowPriority),
implicitFlush(implicitFlush),
outOfOrderExecutionAllowed(outOfOrderExecutionAllowed),
epilogueRequired(epilogueRequired),
usePerDssBackedBuffer(usePerDSSbackedBuffer),
useSingleSubdevice(useSingleSubdevice){};
uint32_t l3CacheSettings, uint32_t threadArbitrationPolicy, uint32_t additionalKernelExecInfo,
KernelExecutionType kernelExecutionType, MemoryCompressionState memoryCompressionState,
uint64_t sliceCount, bool blocking, bool dcFlush, bool useSLM, bool guardCommandBufferWithPipeControl, bool gsba32BitRequired,
bool requiresCoherency, bool lowPriority, bool implicitFlush, bool outOfOrderExecutionAllowed, bool epilogueRequired,
bool usePerDSSbackedBuffer, bool useSingleSubdevice) : csrDependencies(csrDependencies),
barrierTimestampPacketNodes(barrierTimestampPacketNodes),
pipelineSelectArgs(pipelineSelectArgs),
flushStampReference(flushStampReference),
throttle(throttle),
preemptionMode(preemptionMode),
numGrfRequired(numGrfRequired),
l3CacheSettings(l3CacheSettings),
threadArbitrationPolicy(threadArbitrationPolicy),
additionalKernelExecInfo(additionalKernelExecInfo),
kernelExecutionType(kernelExecutionType),
memoryCompressionState(memoryCompressionState),
sliceCount(sliceCount),
blocking(blocking),
dcFlush(dcFlush),
useSLM(useSLM),
guardCommandBufferWithPipeControl(guardCommandBufferWithPipeControl),
gsba32BitRequired(gsba32BitRequired),
requiresCoherency(requiresCoherency),
lowPriority(lowPriority),
implicitFlush(implicitFlush),
outOfOrderExecutionAllowed(outOfOrderExecutionAllowed),
epilogueRequired(epilogueRequired),
usePerDssBackedBuffer(usePerDSSbackedBuffer),
useSingleSubdevice(useSingleSubdevice){};
CsrDependencies csrDependencies;
TimestampPacketContainer *barrierTimestampPacketNodes = nullptr;
@@ -91,6 +94,7 @@ struct DispatchFlags {
uint32_t threadArbitrationPolicy = ThreadArbitrationPolicy::NotPresent;
uint32_t additionalKernelExecInfo = AdditionalKernelExecInfo::NotApplicable;
KernelExecutionType kernelExecutionType = KernelExecutionType::NotApplicable;
MemoryCompressionState memoryCompressionState = MemoryCompressionState::NotApplicable;
uint64_t sliceCount = QueueSliceCount::defaultSliceCount;
uint64_t engineHints = 0;
bool blocking = false;

View File

@@ -0,0 +1,16 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
namespace NEO {
enum class MemoryCompressionState {
Disabled = 0x0u,
Enabled = 0x1u,
NotApplicable = 0x2u
};
} // namespace NEO