/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/gen12lp/hw_cmds.h" using Family = NEO::TGLLPFamily; #include "shared/source/helpers/flat_batch_buffer_helper_hw.inl" #include "shared/source/helpers/hw_helper_bdw_plus.inl" #include "shared/source/helpers/hw_helper_tgllp_plus.inl" #include "opencl/source/aub/aub_helper_bdw_plus.inl" #include "opencl/source/gen12lp/helpers_gen12lp.h" #include "engine_node.h" namespace NEO { template <> void HwHelperHw::setupHardwareCapabilities(HardwareCapabilities *caps, const HardwareInfo &hwInfo) { caps->image3DMaxHeight = 2048; caps->image3DMaxWidth = 2048; //With statefull messages we have an allocation cap of 4GB //Reason to subtract 8KB is that driver may pad the buffer with addition pages for over fetching.. caps->maxMemAllocSize = (4ULL * MemoryConstants::gigaByte) - (8ULL * MemoryConstants::kiloByte); caps->isStatelesToStatefullWithOffsetSupported = true; } template <> bool HwHelperHw::isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const { return Gen12LPHelpers::isOffsetToSkipSetFFIDGPWARequired(hwInfo); } template <> bool HwHelperHw::is3DPipelineSelectWARequired(const HardwareInfo &hwInfo) const { return Gen12LPHelpers::is3DPipelineSelectWARequired(hwInfo); } template <> bool HwHelperHw::isForceDefaultRCSEngineWARequired(const HardwareInfo &hwInfo) { return Gen12LPHelpers::isForceDefaultRCSEngineWARequired(hwInfo); } template <> bool HwHelperHw::isForceEmuInt32DivRemSPWARequired(const HardwareInfo &hwInfo) { return Gen12LPHelpers::isForceEmuInt32DivRemSPWARequired(hwInfo); } template <> void HwHelperHw::adjustDefaultEngineType(HardwareInfo *pHwInfo) { if (!pHwInfo->featureTable.ftrCCSNode || isForceDefaultRCSEngineWARequired(*pHwInfo)) { pHwInfo->capabilityTable.defaultEngineType = aub_stream::ENGINE_RCS; } } template <> uint32_t HwHelperHw::getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const { /* For ICL+ maxThreadCount equals (EUCount * 8). ThreadCount/EUCount=7 is no longer valid, so we have to force 8 in below formula. This is required to allocate enough scratch space. */ return pHwInfo->gtSystemInfo.MaxSubSlicesSupported * pHwInfo->gtSystemInfo.MaxEuPerSubSlice * 8; } template <> bool HwHelperHw::isLocalMemoryEnabled(const HardwareInfo &hwInfo) const { return Gen12LPHelpers::isLocalMemoryEnabled(hwInfo); } template <> bool HwHelperHw::isPageTableManagerSupported(const HardwareInfo &hwInfo) const { return hwInfo.capabilityTable.ftrRenderCompressedBuffers || hwInfo.capabilityTable.ftrRenderCompressedImages; } template <> bool HwHelperHw::obtainRenderBufferCompressionPreference(const HardwareInfo &hwInfo, const size_t size) const { return false; } template <> bool HwHelperHw::checkResourceCompatibility(GraphicsAllocation &graphicsAllocation) { if (graphicsAllocation.getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) { return false; } return true; } template <> void HwHelperHw::setCapabilityCoherencyFlag(const HardwareInfo *pHwInfo, bool &coherencyFlag) { coherencyFlag = true; if (pHwInfo->platform.eProductFamily == IGFX_TIGERLAKE_LP && pHwInfo->platform.usRevId == 0x0) { //stepping A0 devices - turn off coherency coherencyFlag = false; } Gen12LPHelpers::adjustCoherencyFlag(pHwInfo->platform.eProductFamily, coherencyFlag); } template <> uint32_t HwHelperHw::getPitchAlignmentForImage(const HardwareInfo *hwInfo) { if (Gen12LPHelpers::imagePitchAlignmentWaRequired(hwInfo->platform.eProductFamily)) { auto stepping = hwInfo->platform.usRevId; if (stepping == 0) { return 64u; } return 4u; } return 4u; } template <> uint32_t HwHelperHw::getMetricsLibraryGenId() const { return static_cast(MetricsLibraryApi::ClientGen::Gen12); } template <> const HwHelper::EngineInstancesContainer HwHelperHw::getGpgpuEngineInstances(const HardwareInfo &hwInfo) const { auto defaultEngine = getChosenEngineType(hwInfo); EngineInstancesContainer engines = { aub_stream::ENGINE_RCS, aub_stream::ENGINE_RCS, // low priority defaultEngine // internal usage }; if (hwInfo.featureTable.ftrCCSNode) { engines.push_back(aub_stream::ENGINE_CCS); } return engines; }; template <> void MemorySynchronizationCommands::addPipeControlWA(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo) { if (Gen12LPHelpers::pipeControlWaRequired(hwInfo.platform.eProductFamily)) { auto stepping = hwInfo.platform.usRevId; if (stepping == 0) { auto pCmd = static_cast(commandStream.getSpace(sizeof(Family::PIPE_CONTROL))); *pCmd = Family::cmdInitPipeControl; pCmd->setCommandStreamerStallEnable(true); } } } template <> std::string HwHelperHw::getExtensions() const { return "cl_intel_subgroup_local_block_io "; } template <> bool HwHelperHw::isIndependentForwardProgressSupported() { return false; } template <> void MemorySynchronizationCommands::setExtraCacheFlushFields(Family::PIPE_CONTROL *pipeControl) { pipeControl->setHdcPipelineFlush(true); pipeControl->setConstantCacheInvalidationEnable(false); } template class AubHelperHw; template class HwHelperHw; template class FlatBatchBufferHelperHw; template struct MemorySynchronizationCommands; } // namespace NEO