450 lines
24 KiB
C++
450 lines
24 KiB
C++
/*
|
|
* Copyright (C) 2018-2022 Intel Corporation
|
|
*
|
|
* SPDX-License-Identifier: MIT
|
|
*
|
|
*/
|
|
|
|
#pragma once
|
|
#include "shared/source/aub_mem_dump/aub_mem_dump.h"
|
|
#include "shared/source/built_ins/sip_kernel_type.h"
|
|
#include "shared/source/commands/bxml_generator_glue.h"
|
|
#include "shared/source/helpers/aux_translation.h"
|
|
#include "shared/source/helpers/definitions/engine_group_types.h"
|
|
#include "shared/source/helpers/engine_node_helper.h"
|
|
#include "shared/source/helpers/options.h"
|
|
|
|
#include "igfxfmid.h"
|
|
#include "sku_info.h"
|
|
#include "third_party/aub_stream/headers/aubstream.h"
|
|
|
|
#include <cstdint>
|
|
#include <memory>
|
|
#include <string>
|
|
|
|
namespace NEO {
|
|
class GmmHelper;
|
|
class GraphicsAllocation;
|
|
class TagAllocatorBase;
|
|
class LinearStream;
|
|
class Gmm;
|
|
class MemoryManager;
|
|
struct AllocationData;
|
|
struct AllocationProperties;
|
|
struct EncodeSurfaceStateArgs;
|
|
struct RootDeviceEnvironment;
|
|
struct PipeControlArgs;
|
|
|
|
class HwHelper {
|
|
public:
|
|
static HwHelper &get(GFXCORE_FAMILY gfxCore);
|
|
virtual size_t getMaxBarrierRegisterPerSlice() const = 0;
|
|
virtual size_t getPaddingForISAAllocation() const = 0;
|
|
virtual uint32_t getComputeUnitsUsedForScratch(const RootDeviceEnvironment &rootDeviceEnvironment) const = 0;
|
|
virtual uint32_t getPitchAlignmentForImage(const RootDeviceEnvironment &rootDeviceEnvironment) const = 0;
|
|
virtual uint32_t getMaxNumSamplers() const = 0;
|
|
virtual void adjustDefaultEngineType(HardwareInfo *pHwInfo) = 0;
|
|
virtual SipKernelType getSipKernelType(bool debuggingActive) const = 0;
|
|
virtual bool isLocalMemoryEnabled(const HardwareInfo &hwInfo) const = 0;
|
|
virtual bool is1MbAlignmentSupported(const HardwareInfo &hwInfo, bool isCompressionEnabled) const = 0;
|
|
virtual bool isFenceAllocationRequired(const HardwareInfo &hwInfo) const = 0;
|
|
virtual const AubMemDump::LrcaHelper &getCsTraits(aub_stream::EngineType engineType) const = 0;
|
|
virtual bool hvAlign4Required() const = 0;
|
|
virtual bool preferSmallWorkgroupSizeForKernel(const size_t size, const HardwareInfo &hwInfo) const = 0;
|
|
virtual bool isBufferSizeSuitableForCompression(const size_t size, const HardwareInfo &hwInfo) const = 0;
|
|
virtual bool checkResourceCompatibility(GraphicsAllocation &graphicsAllocation) = 0;
|
|
static bool compressedBuffersSupported(const HardwareInfo &hwInfo);
|
|
static bool compressedImagesSupported(const HardwareInfo &hwInfo);
|
|
static bool cacheFlushAfterWalkerSupported(const HardwareInfo &hwInfo);
|
|
static uint32_t getHighestEnabledSlice(const HardwareInfo &hwInfo);
|
|
virtual bool timestampPacketWriteSupported() const = 0;
|
|
virtual bool isTimestampWaitSupportedForQueues() const = 0;
|
|
virtual bool isUpdateTaskCountFromWaitSupported() const = 0;
|
|
virtual size_t getRenderSurfaceStateSize() const = 0;
|
|
virtual void setRenderSurfaceStateForScratchResource(const RootDeviceEnvironment &rootDeviceEnvironment,
|
|
void *surfaceStateBuffer,
|
|
size_t bufferSize,
|
|
uint64_t gpuVa,
|
|
size_t offset,
|
|
uint32_t pitch,
|
|
GraphicsAllocation *gfxAlloc,
|
|
bool isReadOnly,
|
|
uint32_t surfaceType,
|
|
bool forceNonAuxMode,
|
|
bool useL1Cache) = 0;
|
|
virtual const EngineInstancesContainer getGpgpuEngineInstances(const HardwareInfo &hwInfo) const = 0;
|
|
virtual EngineGroupType getEngineGroupType(aub_stream::EngineType engineType, EngineUsage engineUsage, const HardwareInfo &hwInfo) const = 0;
|
|
virtual const StackVec<size_t, 3> getDeviceSubGroupSizes() const = 0;
|
|
virtual const StackVec<uint32_t, 6> getThreadsPerEUConfigs() const = 0;
|
|
virtual bool getEnableLocalMemory(const HardwareInfo &hwInfo) const = 0;
|
|
virtual std::string getExtensions(const HardwareInfo &hwInfo) const = 0;
|
|
static uint32_t getMaxThreadsForVfe(const HardwareInfo &hwInfo);
|
|
virtual uint32_t getMetricsLibraryGenId() const = 0;
|
|
virtual uint32_t getMocsIndex(const GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const = 0;
|
|
virtual bool isLinearStoragePreferred(bool isSharedContext, bool isImage1d, bool forceLinearStorage) = 0;
|
|
virtual uint8_t getBarriersCountFromHasBarriers(uint8_t hasBarriers) const = 0;
|
|
virtual uint32_t calculateAvailableThreadCount(const HardwareInfo &hwInfo, uint32_t grfCount) = 0;
|
|
virtual uint32_t alignSlmSize(uint32_t slmSize) = 0;
|
|
virtual uint32_t computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) = 0;
|
|
|
|
virtual bool isWaDisableRccRhwoOptimizationRequired() const = 0;
|
|
virtual bool isAdditionalFeatureFlagRequired(const FeatureTable *featureTable) const = 0;
|
|
virtual uint32_t getMinimalSIMDSize() = 0;
|
|
virtual bool isWorkaroundRequired(uint32_t lowestSteppingWithBug, uint32_t steppingWithFix, const HardwareInfo &hwInfo) const = 0;
|
|
virtual bool isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const = 0;
|
|
virtual bool isFusedEuDispatchEnabled(const HardwareInfo &hwInfo, bool disableEUFusionForKernel) const = 0;
|
|
virtual uint64_t getGpuTimeStampInNS(uint64_t timeStamp, double frequency) const = 0;
|
|
virtual uint32_t getBindlessSurfaceExtendedMessageDescriptorValue(uint32_t surfStateOffset) const = 0;
|
|
virtual void setExtraAllocationData(AllocationData &allocationData, const AllocationProperties &properties, const HardwareInfo &hwInfo) const = 0;
|
|
virtual bool isBankOverrideRequired(const HardwareInfo &hwInfo) const = 0;
|
|
virtual uint32_t getGlobalTimeStampBits() const = 0;
|
|
virtual int32_t getDefaultThreadArbitrationPolicy() const = 0;
|
|
virtual bool useOnlyGlobalTimestamps() const = 0;
|
|
virtual bool useSystemMemoryPlacementForISA(const HardwareInfo &hwInfo) const = 0;
|
|
virtual bool packedFormatsSupported() const = 0;
|
|
virtual bool isAssignEngineRoundRobinSupported(const HardwareInfo &hwInfo) const = 0;
|
|
virtual bool isRcsAvailable(const HardwareInfo &hwInfo) const = 0;
|
|
virtual bool isCooperativeDispatchSupported(const EngineGroupType engineGroupType, const HardwareInfo &hwInfo) const = 0;
|
|
virtual uint32_t adjustMaxWorkGroupCount(uint32_t maxWorkGroupCount, const EngineGroupType engineGroupType,
|
|
const HardwareInfo &hwInfo, bool isEngineInstanced) const = 0;
|
|
virtual size_t getMaxFillPaternSizeForCopyEngine() const = 0;
|
|
virtual size_t getSipKernelMaxDbgSurfaceSize(const HardwareInfo &hwInfo) const = 0;
|
|
virtual bool isSipWANeeded(const HardwareInfo &hwInfo) const = 0;
|
|
virtual bool isCpuImageTransferPreferred(const HardwareInfo &hwInfo) const = 0;
|
|
virtual bool isKmdMigrationSupported(const HardwareInfo &hwInfo) const = 0;
|
|
virtual aub_stream::MMIOList getExtraMmioList(const HardwareInfo &hwInfo, const GmmHelper &gmmHelper) const = 0;
|
|
virtual uint32_t getDefaultRevisionId(const HardwareInfo &hwInfo) const = 0;
|
|
virtual uint32_t getNumCacheRegions() const = 0;
|
|
virtual bool isSubDeviceEngineSupported(const HardwareInfo &hwInfo, const DeviceBitfield &deviceBitfield, aub_stream::EngineType engineType) const = 0;
|
|
virtual uint32_t getPlanarYuvMaxHeight() const = 0;
|
|
virtual size_t getPreemptionAllocationAlignment() const = 0;
|
|
virtual std::unique_ptr<TagAllocatorBase> createTimestampPacketAllocator(const RootDeviceIndicesContainer &rootDeviceIndices, MemoryManager *memoryManager,
|
|
size_t initialTagCount, CommandStreamReceiverType csrType,
|
|
DeviceBitfield deviceBitfield) const = 0;
|
|
virtual size_t getTimestampPacketAllocatorAlignment() const = 0;
|
|
virtual size_t getSingleTimestampPacketSize() const = 0;
|
|
virtual void applyAdditionalCompressionSettings(Gmm &gmm, bool isNotCompressed) const = 0;
|
|
virtual void applyRenderCompressionFlag(Gmm &gmm, uint32_t isCompressed) const = 0;
|
|
virtual bool unTypedDataPortCacheFlushRequired() const = 0;
|
|
virtual bool isEngineTypeRemappingToHwSpecificRequired() const = 0;
|
|
|
|
static uint32_t getSubDevicesCount(const HardwareInfo *pHwInfo);
|
|
|
|
virtual bool isSipKernelAsHexadecimalArrayPreferred() const = 0;
|
|
virtual void setSipKernelData(uint32_t *&sipKernelBinary, size_t &kernelBinarySize) const = 0;
|
|
virtual void adjustPreemptionSurfaceSize(size_t &csrSize) const = 0;
|
|
virtual size_t getSamplerStateSize() const = 0;
|
|
virtual bool preferInternalBcsEngine() const = 0;
|
|
virtual bool isScratchSpaceSurfaceStateAccessible() const = 0;
|
|
virtual uint64_t getRenderSurfaceStateBaseAddress(void *renderSurfaceState) const = 0;
|
|
virtual uint32_t getRenderSurfaceStatePitch(void *renderSurfaceState) const = 0;
|
|
virtual size_t getMax3dImageWidthOrHeight() const = 0;
|
|
virtual uint64_t getMaxMemAllocSize() const = 0;
|
|
virtual uint64_t getPatIndex(CacheRegion cacheRegion, CachePolicy cachePolicy) const = 0;
|
|
virtual bool isStatelessToStatefulWithOffsetSupported() const = 0;
|
|
virtual void encodeBufferSurfaceState(EncodeSurfaceStateArgs &args) = 0;
|
|
virtual bool disableL3CacheForDebug(const HardwareInfo &hwInfo) const = 0;
|
|
virtual bool isRevisionSpecificBinaryBuiltinRequired() const = 0;
|
|
virtual bool forceNonGpuCoherencyWA(bool requiresCoherency) const = 0;
|
|
virtual bool platformSupportsImplicitScaling(const NEO::HardwareInfo &hwInfo) const = 0;
|
|
virtual size_t getBatchBufferEndSize() const = 0;
|
|
virtual const void *getBatchBufferEndReference() const = 0;
|
|
virtual bool isPlatformFlushTaskEnabled(const NEO::HardwareInfo &hwInfo) const = 0;
|
|
virtual uint32_t getMinimalScratchSpaceSize() const = 0;
|
|
virtual bool copyThroughLockedPtrEnabled() const = 0;
|
|
virtual uint32_t getAmountOfAllocationsToFill() const = 0;
|
|
virtual bool isChipsetUniqueUUIDSupported() const = 0;
|
|
virtual bool isTimestampShiftRequired() const = 0;
|
|
|
|
protected:
|
|
HwHelper() = default;
|
|
};
|
|
|
|
template <typename GfxFamily>
|
|
class HwHelperHw : public HwHelper {
|
|
public:
|
|
static HwHelperHw<GfxFamily> &get() {
|
|
static HwHelperHw<GfxFamily> hwHelper;
|
|
return hwHelper;
|
|
}
|
|
|
|
size_t getRenderSurfaceStateSize() const override {
|
|
using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
|
|
return sizeof(RENDER_SURFACE_STATE);
|
|
}
|
|
|
|
size_t getSamplerStateSize() const override {
|
|
using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE;
|
|
return sizeof(SAMPLER_STATE);
|
|
}
|
|
|
|
uint32_t getBindlessSurfaceExtendedMessageDescriptorValue(uint32_t surfStateOffset) const override {
|
|
using DataPortBindlessSurfaceExtendedMessageDescriptor = typename GfxFamily::DataPortBindlessSurfaceExtendedMessageDescriptor;
|
|
DataPortBindlessSurfaceExtendedMessageDescriptor messageExtDescriptor = {};
|
|
messageExtDescriptor.setBindlessSurfaceOffset(surfStateOffset);
|
|
return messageExtDescriptor.getBindlessSurfaceOffsetToPatch();
|
|
}
|
|
|
|
uint64_t getRenderSurfaceStateBaseAddress(void *renderSurfaceState) const override {
|
|
return reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(renderSurfaceState)->getSurfaceBaseAddress();
|
|
}
|
|
|
|
uint32_t getRenderSurfaceStatePitch(void *renderSurfaceState) const override {
|
|
return reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(renderSurfaceState)->getSurfacePitch();
|
|
}
|
|
|
|
const AubMemDump::LrcaHelper &getCsTraits(aub_stream::EngineType engineType) const override;
|
|
|
|
size_t getMaxBarrierRegisterPerSlice() const override;
|
|
|
|
size_t getPaddingForISAAllocation() const override;
|
|
|
|
uint32_t getComputeUnitsUsedForScratch(const RootDeviceEnvironment &rootDeviceEnvironment) const override;
|
|
|
|
uint32_t getPitchAlignmentForImage(const RootDeviceEnvironment &rootDeviceEnvironment) const override;
|
|
|
|
uint32_t getMaxNumSamplers() const override;
|
|
|
|
void adjustDefaultEngineType(HardwareInfo *pHwInfo) override;
|
|
|
|
SipKernelType getSipKernelType(bool debuggingActive) const override;
|
|
|
|
bool isLocalMemoryEnabled(const HardwareInfo &hwInfo) const override;
|
|
|
|
bool hvAlign4Required() const override;
|
|
|
|
bool isBufferSizeSuitableForCompression(const size_t size, const HardwareInfo &hwInfo) const override;
|
|
|
|
bool checkResourceCompatibility(GraphicsAllocation &graphicsAllocation) override;
|
|
|
|
bool timestampPacketWriteSupported() const override;
|
|
|
|
bool isTimestampWaitSupportedForQueues() const override;
|
|
bool isUpdateTaskCountFromWaitSupported() const override;
|
|
|
|
bool is1MbAlignmentSupported(const HardwareInfo &hwInfo, bool isCompressionEnabled) const override;
|
|
|
|
bool isFenceAllocationRequired(const HardwareInfo &hwInfo) const override;
|
|
|
|
void setRenderSurfaceStateForScratchResource(const RootDeviceEnvironment &rootDeviceEnvironment,
|
|
void *surfaceStateBuffer,
|
|
size_t bufferSize,
|
|
uint64_t gpuVa,
|
|
size_t offset,
|
|
uint32_t pitch,
|
|
GraphicsAllocation *gfxAlloc,
|
|
bool isReadOnly,
|
|
uint32_t surfaceType,
|
|
bool forceNonAuxMode,
|
|
bool useL1Cache) override;
|
|
|
|
MOCKABLE_VIRTUAL void setL1CachePolicy(bool useL1Cache, typename GfxFamily::RENDER_SURFACE_STATE *surfaceState, const HardwareInfo *hwInfo);
|
|
|
|
const EngineInstancesContainer getGpgpuEngineInstances(const HardwareInfo &hwInfo) const override;
|
|
|
|
EngineGroupType getEngineGroupType(aub_stream::EngineType engineType, EngineUsage engineUsage, const HardwareInfo &hwInfo) const override;
|
|
|
|
const StackVec<size_t, 3> getDeviceSubGroupSizes() const override;
|
|
|
|
const StackVec<uint32_t, 6> getThreadsPerEUConfigs() const override;
|
|
|
|
bool getEnableLocalMemory(const HardwareInfo &hwInfo) const override;
|
|
|
|
std::string getExtensions(const HardwareInfo &hwInfo) const override;
|
|
|
|
uint32_t getMetricsLibraryGenId() const override;
|
|
|
|
uint32_t getMocsIndex(const GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const override;
|
|
|
|
bool isLinearStoragePreferred(bool isSharedContext, bool isImage1d, bool forceLinearStorage) override;
|
|
|
|
uint8_t getBarriersCountFromHasBarriers(uint8_t hasBarriers) const override;
|
|
|
|
uint32_t calculateAvailableThreadCount(const HardwareInfo &hwInfo, uint32_t grfCount) override;
|
|
|
|
uint32_t alignSlmSize(uint32_t slmSize) override;
|
|
|
|
uint32_t computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) override;
|
|
|
|
static AuxTranslationMode getAuxTranslationMode(const HardwareInfo &hwInfo);
|
|
|
|
bool isWorkaroundRequired(uint32_t lowestSteppingWithBug, uint32_t steppingWithFix, const HardwareInfo &hwInfo) const override;
|
|
|
|
bool isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const override;
|
|
|
|
bool isFusedEuDispatchEnabled(const HardwareInfo &hwInfo, bool disableEUFusionForKernel) const override;
|
|
|
|
static bool isForceDefaultRCSEngineWARequired(const HardwareInfo &hwInfo);
|
|
|
|
bool isWaDisableRccRhwoOptimizationRequired() const override;
|
|
|
|
bool isAdditionalFeatureFlagRequired(const FeatureTable *featureTable) const override;
|
|
|
|
uint32_t getMinimalSIMDSize() override;
|
|
|
|
uint64_t getGpuTimeStampInNS(uint64_t timeStamp, double frequency) const override;
|
|
|
|
uint32_t getGlobalTimeStampBits() const override;
|
|
|
|
void setExtraAllocationData(AllocationData &allocationData, const AllocationProperties &properties, const HardwareInfo &hwInfo) const override;
|
|
|
|
bool isBankOverrideRequired(const HardwareInfo &hwInfo) const override;
|
|
|
|
int32_t getDefaultThreadArbitrationPolicy() const override;
|
|
|
|
bool useOnlyGlobalTimestamps() const override;
|
|
|
|
bool useSystemMemoryPlacementForISA(const HardwareInfo &hwInfo) const override;
|
|
|
|
bool packedFormatsSupported() const override;
|
|
|
|
bool isRcsAvailable(const HardwareInfo &hwInfo) const override;
|
|
|
|
bool isCooperativeDispatchSupported(const EngineGroupType engineGroupType, const HardwareInfo &hwInfo) const override;
|
|
|
|
uint32_t adjustMaxWorkGroupCount(uint32_t maxWorkGroupCount, const EngineGroupType engineGroupType,
|
|
const HardwareInfo &hwInfo, bool isEngineInstanced) const override;
|
|
|
|
size_t getMaxFillPaternSizeForCopyEngine() const override;
|
|
|
|
size_t getSipKernelMaxDbgSurfaceSize(const HardwareInfo &hwInfo) const override;
|
|
|
|
bool isKmdMigrationSupported(const HardwareInfo &hwInfo) const override;
|
|
|
|
bool isSipWANeeded(const HardwareInfo &hwInfo) const override;
|
|
|
|
bool isCpuImageTransferPreferred(const HardwareInfo &hwInfo) const override;
|
|
|
|
aub_stream::MMIOList getExtraMmioList(const HardwareInfo &hwInfo, const GmmHelper &gmmHelper) const override;
|
|
|
|
uint32_t getDefaultRevisionId(const HardwareInfo &hwInfo) const override;
|
|
|
|
uint32_t getNumCacheRegions() const override;
|
|
|
|
bool isSubDeviceEngineSupported(const HardwareInfo &hwInfo, const DeviceBitfield &deviceBitfield, aub_stream::EngineType engineType) const override;
|
|
|
|
uint32_t getPlanarYuvMaxHeight() const override;
|
|
|
|
size_t getPreemptionAllocationAlignment() const override;
|
|
|
|
std::unique_ptr<TagAllocatorBase> createTimestampPacketAllocator(const RootDeviceIndicesContainer &rootDeviceIndices, MemoryManager *memoryManager,
|
|
size_t initialTagCount, CommandStreamReceiverType csrType,
|
|
DeviceBitfield deviceBitfield) const override;
|
|
size_t getTimestampPacketAllocatorAlignment() const override;
|
|
|
|
size_t getSingleTimestampPacketSize() const override;
|
|
static size_t getSingleTimestampPacketSizeHw();
|
|
|
|
void applyAdditionalCompressionSettings(Gmm &gmm, bool isNotCompressed) const override;
|
|
|
|
bool preferSmallWorkgroupSizeForKernel(const size_t size, const HardwareInfo &hwInfo) const override;
|
|
|
|
void applyRenderCompressionFlag(Gmm &gmm, uint32_t isCompressed) const override;
|
|
|
|
bool unTypedDataPortCacheFlushRequired() const override;
|
|
|
|
bool isAssignEngineRoundRobinSupported(const HardwareInfo &hwInfo) const override;
|
|
|
|
bool isEngineTypeRemappingToHwSpecificRequired() const override;
|
|
|
|
bool isSipKernelAsHexadecimalArrayPreferred() const override;
|
|
|
|
void setSipKernelData(uint32_t *&sipKernelBinary, size_t &kernelBinarySize) const override;
|
|
|
|
void adjustPreemptionSurfaceSize(size_t &csrSize) const override;
|
|
|
|
bool isScratchSpaceSurfaceStateAccessible() const override;
|
|
bool preferInternalBcsEngine() const override;
|
|
size_t getMax3dImageWidthOrHeight() const override;
|
|
uint64_t getMaxMemAllocSize() const override;
|
|
uint64_t getPatIndex(CacheRegion cacheRegion, CachePolicy cachePolicy) const override;
|
|
bool isStatelessToStatefulWithOffsetSupported() const override;
|
|
void encodeBufferSurfaceState(EncodeSurfaceStateArgs &args) override;
|
|
bool disableL3CacheForDebug(const HardwareInfo &hwInfo) const override;
|
|
bool isRevisionSpecificBinaryBuiltinRequired() const override;
|
|
bool forceNonGpuCoherencyWA(bool requiresCoherency) const override;
|
|
bool platformSupportsImplicitScaling(const NEO::HardwareInfo &hwInfo) const override;
|
|
size_t getBatchBufferEndSize() const override;
|
|
const void *getBatchBufferEndReference() const override;
|
|
bool isPlatformFlushTaskEnabled(const NEO::HardwareInfo &hwInfo) const override;
|
|
uint32_t getMinimalScratchSpaceSize() const override;
|
|
bool copyThroughLockedPtrEnabled() const override;
|
|
uint32_t getAmountOfAllocationsToFill() const override;
|
|
bool isChipsetUniqueUUIDSupported() const override;
|
|
bool isTimestampShiftRequired() const override;
|
|
|
|
protected:
|
|
static const AuxTranslationMode defaultAuxTranslationMode;
|
|
HwHelperHw() = default;
|
|
};
|
|
|
|
struct DwordBuilder {
|
|
static uint32_t build(uint32_t bitNumberToSet, bool masked, bool set = true, uint32_t initValue = 0) {
|
|
uint32_t dword = initValue;
|
|
if (set) {
|
|
dword |= (1 << bitNumberToSet);
|
|
}
|
|
if (masked) {
|
|
dword |= (1 << (bitNumberToSet + 16));
|
|
}
|
|
return dword;
|
|
};
|
|
};
|
|
|
|
template <typename GfxFamily>
|
|
struct LriHelper {
|
|
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
|
|
|
|
static void program(LinearStream *cmdStream, uint32_t address, uint32_t value, bool remap);
|
|
};
|
|
|
|
template <typename GfxFamily>
|
|
struct MemorySynchronizationCommands {
|
|
static void addSingleBarrier(LinearStream &commandStream, PostSyncMode postSyncMode, uint64_t gpuAddress, uint64_t immediateData, PipeControlArgs &args);
|
|
static void setSingleBarrier(void *commandsBuffer, PostSyncMode postSyncMode, uint64_t gpuAddress, uint64_t immediateData, PipeControlArgs &args);
|
|
static void addSingleBarrier(LinearStream &commandStream, PipeControlArgs &args);
|
|
static void setSingleBarrier(void *commandsBuffer, PipeControlArgs &args);
|
|
|
|
static void addBarrierWithPostSyncOperation(LinearStream &commandStream, PostSyncMode postSyncMode, uint64_t gpuAddress, uint64_t immediateData, const HardwareInfo &hwInfo, PipeControlArgs &args);
|
|
static void setBarrierWithPostSyncOperation(void *&commandsBuffer, PostSyncMode postSyncMode, uint64_t gpuAddress, uint64_t immediateData, const HardwareInfo &hwInfo, PipeControlArgs &args);
|
|
|
|
static void setPostSyncExtraProperties(PipeControlArgs &args, const HardwareInfo &hwInfo);
|
|
|
|
static void addBarrierWa(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo);
|
|
static void setBarrierWa(void *&commandsBuffer, uint64_t gpuAddress, const HardwareInfo &hwInfo);
|
|
|
|
static void setBarrierWaFlags(void *barrierCmd);
|
|
|
|
static void addAdditionalSynchronizationForDirectSubmission(LinearStream &commandStream, uint64_t gpuAddress, bool acquire, const HardwareInfo &hwInfo);
|
|
static void addAdditionalSynchronization(LinearStream &commandStream, uint64_t gpuAddress, bool acquire, const HardwareInfo &hwInfo);
|
|
static void setAdditionalSynchronization(void *&commandsBuffer, uint64_t gpuAddress, bool acquire, const HardwareInfo &hwInfo);
|
|
|
|
static bool getDcFlushEnable(bool isFlushPreferred, const HardwareInfo &hwInfo);
|
|
|
|
static void addFullCacheFlush(LinearStream &commandStream, const HardwareInfo &hwInfo);
|
|
static void setCacheFlushExtraProperties(PipeControlArgs &args);
|
|
|
|
static size_t getSizeForBarrierWithPostSyncOperation(const HardwareInfo &hwInfo, bool tlbInvalidationRequired);
|
|
static size_t getSizeForBarrierWa(const HardwareInfo &hwInfo);
|
|
static size_t getSizeForSingleBarrier(bool tlbInvalidationRequired);
|
|
static size_t getSizeForSingleAdditionalSynchronizationForDirectSubmission(const HardwareInfo &hwInfo);
|
|
static size_t getSizeForSingleAdditionalSynchronization(const HardwareInfo &hwInfo);
|
|
static size_t getSizeForAdditonalSynchronization(const HardwareInfo &hwInfo);
|
|
static size_t getSizeForFullCacheFlush();
|
|
|
|
static bool isBarrierWaRequired(const HardwareInfo &hwInfo);
|
|
static bool isBarrierlPriorToPipelineSelectWaRequired(const HardwareInfo &hwInfo);
|
|
static void setBarrierExtraProperties(void *barrierCmd, PipeControlArgs &args);
|
|
};
|
|
|
|
union SURFACE_STATE_BUFFER_LENGTH {
|
|
uint32_t Length;
|
|
struct SurfaceState {
|
|
uint32_t Width : BITFIELD_RANGE(0, 6);
|
|
uint32_t Height : BITFIELD_RANGE(7, 20);
|
|
uint32_t Depth : BITFIELD_RANGE(21, 31);
|
|
} SurfaceState;
|
|
};
|
|
|
|
} // namespace NEO
|