/* * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/aub_mem_dump/aub_mem_dump.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/constants.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/preamble.h" #include "shared/source/memory_manager/allocation_properties.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/os_interface/os_interface.h" #include "pipe_control_args.h" namespace NEO { template const AuxTranslationMode HwHelperHw::defaultAuxTranslationMode = AuxTranslationMode::Builtin; template bool HwHelperHw::isBufferSizeSuitableForRenderCompression(const size_t size) const { return size > KB; } template void HwHelperHw::setupHardwareCapabilities(HardwareCapabilities *caps, const HardwareInfo &hwInfo) { caps->image3DMaxHeight = 16384; caps->image3DMaxWidth = 16384; //With statefull messages we have an allocation cap of 4GB //Reason to subtract 8KB is that driver may pad the buffer with addition pages for over fetching.. caps->maxMemAllocSize = (4ULL * MemoryConstants::gigaByte) - (8ULL * MemoryConstants::kiloByte); caps->isStatelesToStatefullWithOffsetSupported = true; } template bool HwHelperHw::isL3Configurable(const HardwareInfo &hwInfo) { return PreambleHelper::isL3Configurable(hwInfo); } template SipKernelType HwHelperHw::getSipKernelType(bool debuggingActive) const { if (!debuggingActive) { return SipKernelType::Csr; } return SipKernelType::DbgCsr; } template size_t HwHelperHw::getMaxBarrierRegisterPerSlice() const { return 32; } template uint32_t HwHelperHw::getPitchAlignmentForImage(const HardwareInfo *hwInfo) const { return 4u; } template uint32_t HwHelperHw::getMaxNumSamplers() const { return 16; } template const AubMemDump::LrcaHelper &HwHelperHw::getCsTraits(aub_stream::EngineType engineType) const { return *AUBFamilyMapper::csTraits[engineType]; } template bool HwHelperHw::isPageTableManagerSupported(const HardwareInfo &hwInfo) const { return false; } template bool HwHelperHw::isFenceAllocationRequired(const HardwareInfo &hwInfo) const { return false; } template inline bool HwHelperHw::checkResourceCompatibility(GraphicsAllocation &graphicsAllocation) { return true; } template void HwHelperHw::setRenderSurfaceStateForBuffer(const RootDeviceEnvironment &rootDeviceEnvironment, void *surfaceStateBuffer, size_t bufferSize, uint64_t gpuVa, size_t offset, uint32_t pitch, GraphicsAllocation *gfxAlloc, bool isReadOnly, uint32_t surfaceType, bool forceNonAuxMode, bool useL1Cache) { using RENDER_SURFACE_STATE = typename Family::RENDER_SURFACE_STATE; using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; auto gmmHelper = rootDeviceEnvironment.getGmmHelper(); auto surfaceState = reinterpret_cast(surfaceStateBuffer); RENDER_SURFACE_STATE state = Family::cmdInitRenderSurfaceState; auto surfaceSize = alignUp(bufferSize, 4); SURFACE_STATE_BUFFER_LENGTH Length = {0}; Length.Length = static_cast(surfaceSize - 1); state.setWidth(Length.SurfaceState.Width + 1); state.setHeight(Length.SurfaceState.Height + 1); state.setDepth(Length.SurfaceState.Depth + 1); if (pitch) { state.setSurfacePitch(pitch); } // The graphics allocation for Host Ptr surface will be created in makeResident call and GPU address is expected to be the same as CPU address auto bufferStateAddress = (gfxAlloc != nullptr) ? gfxAlloc->getGpuAddress() : gpuVa; bufferStateAddress += offset; auto bufferStateSize = (gfxAlloc != nullptr) ? gfxAlloc->getUnderlyingBufferSize() : bufferSize; state.setSurfaceType(static_cast(surfaceType)); state.setSurfaceFormat(SURFACE_FORMAT::SURFACE_FORMAT_RAW); state.setSurfaceVerticalAlignment(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4); state.setSurfaceHorizontalAlignment(RENDER_SURFACE_STATE::SURFACE_HORIZONTAL_ALIGNMENT_HALIGN_4); state.setTileMode(RENDER_SURFACE_STATE::TILE_MODE_LINEAR); state.setVerticalLineStride(0); state.setVerticalLineStrideOffset(0); if ((isAligned(bufferStateAddress) && isAligned(bufferStateSize)) || isReadOnly) { state.setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER)); } else { state.setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED)); } state.setSurfaceBaseAddress(bufferStateAddress); Gmm *gmm = gfxAlloc ? gfxAlloc->getDefaultGmm() : nullptr; if (gmm && gmm->isRenderCompressed && !forceNonAuxMode) { // Its expected to not program pitch/qpitch/baseAddress for Aux surface in CCS scenarios state.setCoherencyType(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT); EncodeSurfaceState::setBufferAuxParamsForCCS(&state); } else { state.setCoherencyType(RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT); state.setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE); } *surfaceState = state; setL1CachePolicy(useL1Cache, surfaceState, rootDeviceEnvironment.getHardwareInfo()); } template void NEO::HwHelperHw::setL1CachePolicy(bool useL1Cache, typename GfxFamily::RENDER_SURFACE_STATE *surfaceState, const HardwareInfo *hwInfo) {} template bool HwHelperHw::getEnableLocalMemory(const HardwareInfo &hwInfo) const { if (DebugManager.flags.EnableLocalMemory.get() != -1) { return DebugManager.flags.EnableLocalMemory.get(); } else if (DebugManager.flags.AUBDumpForceAllToLocalMemory.get()) { return true; } return OSInterface::osEnableLocalMemory && isLocalMemoryEnabled(hwInfo); } template AuxTranslationMode HwHelperHw::getAuxTranslationMode() { if (DebugManager.flags.ForceAuxTranslationMode.get() != -1) { return static_cast(DebugManager.flags.ForceAuxTranslationMode.get()); } return HwHelperHw::defaultAuxTranslationMode; } template void MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( LinearStream &commandStream, POST_SYNC_OPERATION operation, uint64_t gpuAddress, uint64_t immediateData, const HardwareInfo &hwInfo, PipeControlArgs &args) { using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; addPipeControlWA(commandStream, gpuAddress, hwInfo); setPostSyncExtraProperties(args, hwInfo); addPipeControlWithPostSync(commandStream, operation, gpuAddress, immediateData, args); MemorySynchronizationCommands::addAdditionalSynchronization(commandStream, gpuAddress, hwInfo); } template void MemorySynchronizationCommands::addPipeControlWithPostSync( LinearStream &commandStream, POST_SYNC_OPERATION operation, uint64_t gpuAddress, uint64_t immediateData, PipeControlArgs &args) { using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; PIPE_CONTROL cmd = GfxFamily::cmdInitPipeControl; setPipeControl(cmd, args); cmd.setPostSyncOperation(operation); cmd.setAddress(static_cast(gpuAddress & 0x0000FFFFFFFFULL)); cmd.setAddressHigh(static_cast(gpuAddress >> 32)); if (operation == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { cmd.setImmediateData(immediateData); } PIPE_CONTROL *pipeControl = commandStream.getSpaceForCmd(); *pipeControl = cmd; } template void MemorySynchronizationCommands::setPipeControl(typename GfxFamily::PIPE_CONTROL &pipeControl, PipeControlArgs &args) { pipeControl.setCommandStreamerStallEnable(true); pipeControl.setDcFlushEnable(args.dcFlushEnable); pipeControl.setConstantCacheInvalidationEnable(args.constantCacheInvalidationEnable); pipeControl.setInstructionCacheInvalidateEnable(args.instructionCacheInvalidateEnable); pipeControl.setPipeControlFlushEnable(args.pipeControlFlushEnable); pipeControl.setRenderTargetCacheFlushEnable(args.renderTargetCacheFlushEnable); pipeControl.setStateCacheInvalidationEnable(args.stateCacheInvalidationEnable); pipeControl.setTextureCacheInvalidationEnable(args.textureCacheInvalidationEnable); pipeControl.setVfCacheInvalidationEnable(args.vfCacheInvalidationEnable); pipeControl.setGenericMediaStateClear(args.genericMediaStateClear); setPipeControlExtraProperties(pipeControl, args); if (DebugManager.flags.FlushAllCaches.get()) { pipeControl.setDcFlushEnable(true); pipeControl.setRenderTargetCacheFlushEnable(true); pipeControl.setInstructionCacheInvalidateEnable(true); pipeControl.setTextureCacheInvalidationEnable(true); pipeControl.setPipeControlFlushEnable(true); pipeControl.setVfCacheInvalidationEnable(true); pipeControl.setConstantCacheInvalidationEnable(true); pipeControl.setStateCacheInvalidationEnable(true); pipeControl.setTlbInvalidate(true); } if (DebugManager.flags.DoNotFlushCaches.get()) { pipeControl.setDcFlushEnable(false); pipeControl.setRenderTargetCacheFlushEnable(false); pipeControl.setInstructionCacheInvalidateEnable(false); pipeControl.setTextureCacheInvalidationEnable(false); pipeControl.setPipeControlFlushEnable(false); pipeControl.setVfCacheInvalidationEnable(false); pipeControl.setConstantCacheInvalidationEnable(false); pipeControl.setStateCacheInvalidationEnable(false); } } template void MemorySynchronizationCommands::addPipeControl(LinearStream &commandStream, PipeControlArgs &args) { using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; PIPE_CONTROL cmd = GfxFamily::cmdInitPipeControl; MemorySynchronizationCommands::setPipeControl(cmd, args); auto pipeControl = commandStream.getSpaceForCmd(); *pipeControl = cmd; } template void MemorySynchronizationCommands::addPipeControlWithCSStallOnly(LinearStream &commandStream, PipeControlArgs &args) { using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; PIPE_CONTROL cmd = GfxFamily::cmdInitPipeControl; cmd.setCommandStreamerStallEnable(true); auto pipeControl = commandStream.getSpaceForCmd(); *pipeControl = cmd; } template size_t MemorySynchronizationCommands::getSizeForSinglePipeControl() { return sizeof(typename GfxFamily::PIPE_CONTROL); } template size_t MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(const HardwareInfo &hwInfo) { const auto pipeControlCount = MemorySynchronizationCommands::isPipeControlWArequired(hwInfo) ? 2u : 1u; return pipeControlCount * getSizeForSinglePipeControl() + getSizeForAdditonalSynchronization(hwInfo); } template void MemorySynchronizationCommands::addAdditionalSynchronization(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo) { } template inline size_t MemorySynchronizationCommands::getSizeForSingleSynchronization(const HardwareInfo &hwInfo) { return 0u; } template inline size_t MemorySynchronizationCommands::getSizeForAdditonalSynchronization(const HardwareInfo &hwInfo) { return 0u; } template uint32_t HwHelperHw::getMetricsLibraryGenId() const { return static_cast(MetricsLibraryApi::ClientGen::Gen9); } template bool HwHelperHw::tilingAllowed(bool isSharedContext, bool isImage1d, bool forceLinearStorage) { if (DebugManager.flags.ForceLinearImages.get() || forceLinearStorage || isSharedContext) { return false; } return !isImage1d; } template uint32_t HwHelperHw::alignSlmSize(uint32_t slmSize) { if (slmSize == 0u) { return 0u; } slmSize = std::max(slmSize, 1024u); slmSize = Math::nextPowerOfTwo(slmSize); UNRECOVERABLE_IF(slmSize > 64u * KB); return slmSize; } template uint32_t HwHelperHw::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) { auto value = std::max(slmSize, 1024u); value = Math::nextPowerOfTwo(value); value = Math::getMinLsbSet(value); value = value - 9; DEBUG_BREAK_IF(value > 7); return value * !!slmSize; } template uint32_t HwHelperHw::getBarriersCountFromHasBarriers(uint32_t hasBarriers) { return hasBarriers; } template inline bool HwHelperHw::isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const { return false; } template uint32_t HwHelperHw::getHwRevIdFromStepping(uint32_t stepping, const HardwareInfo &hwInfo) const { return CommonConstants::invalidStepping; } template uint32_t HwHelperHw::getSteppingFromHwRevId(uint32_t hwRevId, const HardwareInfo &hwInfo) const { return CommonConstants::invalidStepping; } template bool HwHelperHw::isWorkaroundRequired(uint32_t lowestSteppingWithBug, uint32_t steppingWithFix, const HardwareInfo &hwInfo) const { auto lowestHwRevIdWithBug = getHwRevIdFromStepping(lowestSteppingWithBug, hwInfo); auto hwRevIdWithFix = getHwRevIdFromStepping(steppingWithFix, hwInfo); if ((lowestHwRevIdWithBug == CommonConstants::invalidStepping) || (hwRevIdWithFix == CommonConstants::invalidStepping)) { return false; } return (lowestHwRevIdWithBug <= hwInfo.platform.usRevId && hwInfo.platform.usRevId < hwRevIdWithFix); } template bool HwHelperHw::is3DPipelineSelectWARequired(const HardwareInfo &hwInfo) const { return false; } template bool HwHelperHw::isForceDefaultRCSEngineWARequired(const HardwareInfo &hwInfo) { return false; } template bool HwHelperHw::isForceEmuInt32DivRemSPWARequired(const HardwareInfo &hwInfo) { return false; } template bool HwHelperHw::isWaDisableRccRhwoOptimizationRequired() const { return false; } template inline uint32_t HwHelperHw::getMinimalSIMDSize() { return 8u; } template uint32_t HwHelperHw::getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const { return HwHelper::getMaxThreadsForWorkgroup(hwInfo, maxNumEUsPerSubSlice); } template inline bool HwHelperHw::isSpecialWorkgroupSizeRequired(const HardwareInfo &hwInfo, bool isSimulation) const { return false; } template inline bool HwHelperHw::allowRenderCompression(const HardwareInfo &hwInfo) const { return true; } template inline bool HwHelperHw::isBlitCopyRequiredForLocalMemory(const HardwareInfo &hwInfo, const GraphicsAllocation &allocation) const { return allocation.isAllocatedInLocalMemoryPool() && (getLocalMemoryAccessMode(hwInfo) == LocalMemoryAccessMode::CpuAccessDisallowed) && hwInfo.capabilityTable.blitterOperationsSupported; } template inline bool HwHelperHw::forceBlitterUseForGlobalBuffers(const HardwareInfo &hwInfo, GraphicsAllocation *allocation) const { return false; } template bool HwHelperHw::additionalKernelExecInfoSupported(const HardwareInfo &hwInfo) const { return false; } template LocalMemoryAccessMode HwHelperHw::getLocalMemoryAccessMode(const HardwareInfo &hwInfo) const { switch (static_cast(DebugManager.flags.ForceLocalMemoryAccessMode.get())) { case LocalMemoryAccessMode::Default: case LocalMemoryAccessMode::CpuAccessAllowed: case LocalMemoryAccessMode::CpuAccessDisallowed: return static_cast(DebugManager.flags.ForceLocalMemoryAccessMode.get()); } return getDefaultLocalMemoryAccessMode(hwInfo); } template inline LocalMemoryAccessMode HwHelperHw::getDefaultLocalMemoryAccessMode(const HardwareInfo &hwInfo) const { return LocalMemoryAccessMode::Default; } template size_t MemorySynchronizationCommands::getSizeForFullCacheFlush() { return sizeof(typename GfxFamily::PIPE_CONTROL); } template void MemorySynchronizationCommands::addFullCacheFlush(LinearStream &commandStream) { using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; PIPE_CONTROL *pipeControl = commandStream.getSpaceForCmd(); PIPE_CONTROL cmd = GfxFamily::cmdInitPipeControl; PipeControlArgs args(true); args.renderTargetCacheFlushEnable = true; args.instructionCacheInvalidateEnable = true; args.textureCacheInvalidationEnable = true; args.pipeControlFlushEnable = true; args.constantCacheInvalidationEnable = true; args.stateCacheInvalidationEnable = true; MemorySynchronizationCommands::setCacheFlushExtraProperties(args); MemorySynchronizationCommands::setPipeControl(cmd, args); *pipeControl = cmd; } template const StackVec HwHelperHw::getDeviceSubGroupSizes() const { return {8, 16, 32}; } template const StackVec HwHelperHw::getThreadsPerEUConfigs() const { return {}; } template void HwHelperHw::setExtraAllocationData(AllocationData &allocationData, const AllocationProperties &properties, const HardwareInfo &hwInfo) const {} template bool HwHelperHw::isBankOverrideRequired(const HardwareInfo &hwInfo) const { return false; } template uint32_t HwHelperHw::getDefaultThreadArbitrationPolicy() const { return 0; } template bool HwHelperHw::useOnlyGlobalTimestamps() const { return false; } template bool HwHelperHw::useSystemMemoryPlacementForISA(const HardwareInfo &hwInfo) const { return !getEnableLocalMemory(hwInfo); } template bool HwHelperHw::isCpuImageTransferPreferred(const HardwareInfo &hwInfo) const { return false; } template bool MemorySynchronizationCommands::isPipeControlPriorToPipelineSelectWArequired(const HardwareInfo &hwInfo) { return false; } template bool HwHelperHw::isCooperativeDispatchSupported(const aub_stream::EngineType engine, const PRODUCT_FAMILY productFamily) const { return true; } template bool HwHelperHw::isMediaBlockIOSupported(const HardwareInfo &hwInfo) const { return hwInfo.capabilityTable.supportsImages; } template bool HwHelperHw::isKmdMigrationSupported(const HardwareInfo &hwInfo) const { return false; } template bool HwHelperHw::isCopyOnlyEngineType(EngineGroupType type) const { return NEO::EngineGroupType::Copy == type; } template void HwHelperHw::adjustAddressWidthForCanonize(uint32_t &addressWidth) const { } template bool HwHelperHw::isSipWANeeded(const HardwareInfo &hwInfo) const { return false; } template bool HwHelperHw::isAdditionalFeatureFlagRequired(const FeatureTable *featureTable) const { return false; } template uint32_t HwHelperHw::getDefaultRevisionId(const HardwareInfo &hwInfo) const { return 0u; } } // namespace NEO