diff --git a/cmake/fill_gens.cmake b/cmake/fill_gens.cmake index 5266e264b9..21bf15dffb 100644 --- a/cmake/fill_gens.cmake +++ b/cmake/fill_gens.cmake @@ -1,5 +1,5 @@ # -# Copyright (C) 2020 Intel Corporation +# Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # @@ -8,3 +8,5 @@ list(APPEND ALL_GEN_TYPES "GEN8") list(APPEND ALL_GEN_TYPES "GEN9") list(APPEND ALL_GEN_TYPES "GEN11") list(APPEND ALL_GEN_TYPES "GEN12LP") +list(APPEND ALL_GEN_TYPES "XE_HP_CORE") +list(APPEND XEHP_PLUS_GENS "XE_HP_CORE") diff --git a/cmake/setup_platform_flags.cmake b/cmake/setup_platform_flags.cmake index 850e753ebc..30a33e6267 100644 --- a/cmake/setup_platform_flags.cmake +++ b/cmake/setup_platform_flags.cmake @@ -1,5 +1,5 @@ # -# Copyright (C) 2020 Intel Corporation +# Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # @@ -9,6 +9,16 @@ SET_FLAGS_FOR("GEN9" "SKL" "KBL" "BXT" "GLK" "CFL") SET_FLAGS_FOR("GEN11" "ICLLP" "LKF" "EHL") SET_FLAGS_FOR("GEN12LP" "TGLLP" "RKL" "ADLS") +foreach(GEN_TYPE ${XEHP_PLUS_GENS}) + if(TESTS_${GEN_TYPE}) + set(TESTS_XEHP_PLUS 1) + endif() + if(SUPPORT_${GEN_TYPE}) + set(SUPPORT_XEHP_PLUS 1) + endif() + set(SUPPORT_DEVICE_ENQUEUE_${GEN_TYPE} FALSE CACHE BOOL "Disabled support ${GEN_TYPE} for device side enqueue" FORCE) +endforeach() + # Add supported and tested platforms if(SUPPORT_GEN8) set(CORE_GEN8_REVISIONS 0) @@ -179,3 +189,21 @@ if(SUPPORT_GEN12LP) endif() endif() endif() + +if(SUPPORT_XE_HP_CORE) + set(CORE_XE_HP_CORE_REVISIONS 0) + if(TESTS_XE_HP_CORE) + ADD_ITEM_FOR_GEN("FAMILY_NAME" "TESTED" "XE_HP_CORE" "XeHpFamily") + endif() + if(SUPPORT_XEHP) + ADD_PRODUCT("SUPPORTED" "XEHP" "IGFX_XE_HP_SDV") + ADD_PLATFORM_FOR_GEN("SUPPORTED" "XE_HP_CORE" "XEHP" "CORE") + ADD_PLATFORM_FOR_GEN("SUPPORTED_IMAGES" "XE_HP_CORE" "XEHP" "CORE") + set(PREFERRED_PLATFORM "XEHP") + if(TESTS_XEHP) + set(PREFERRED_FAMILY_NAME "XeHpFamily") + ADD_ITEM_FOR_GEN("PLATFORMS" "TESTED" "XE_HP_CORE" "XEHP") + ADD_PRODUCT("TESTED" "XEHP" "IGFX_XE_HP_SDV") + endif() + endif() +endif() diff --git a/level_zero/core/source/CMakeLists.txt b/level_zero/core/source/CMakeLists.txt index 3465744ce4..dbeaca5857 100644 --- a/level_zero/core/source/CMakeLists.txt +++ b/level_zero/core/source/CMakeLists.txt @@ -88,6 +88,13 @@ set(L0_RUNTIME_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/sampler/sampler_imp.h ) +if(SUPPORT_XEHP_PLUS) + list(APPEND L0_RUNTIME_SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist/cmdlist_hw_xehp_plus.inl + ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue/cmdqueue_xe_hp_core_plus.inl + ) +endif() + target_include_directories(${L0_STATIC_LIB_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/ diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_plus.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_plus.inl new file mode 100644 index 0000000000..5c3a3c475d --- /dev/null +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_plus.inl @@ -0,0 +1,220 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once +#include "shared/source/command_container/implicit_scaling.h" +#include "shared/source/command_stream/preemption.h" +#include "shared/source/helpers/cache_flush_xehp_plus.inl" +#include "shared/source/helpers/pipeline_select_helper.h" +#include "shared/source/helpers/simd_helper.h" +#include "shared/source/indirect_heap/indirect_heap.h" +#include "shared/source/kernel/grf_config.h" +#include "shared/source/memory_manager/memory_manager.h" +#include "shared/source/memory_manager/residency_container.h" +#include "shared/source/unified_memory/unified_memory.h" +#include "shared/source/utilities/software_tags_manager.h" +#include "shared/source/xe_hp_core/hw_cmds.h" +#include "shared/source/xe_hp_core/hw_info.h" + +#include "level_zero/core/source/cmdlist/cmdlist_hw.h" +#include "level_zero/core/source/kernel/kernel_imp.h" +#include "level_zero/core/source/module/module.h" + +#include "igfxfmid.h" + +namespace L0 { + +template +struct EncodeStateBaseAddress; + +template +size_t CommandListCoreFamily::getReserveSshSize() { + return 4 * MemoryConstants::pageSize; +} + +template +void CommandListCoreFamily::applyMemoryRangesBarrier(uint32_t numRanges, + const size_t *pRangeSizes, + const void **pRanges) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + + NEO::LinearStream *commandStream = commandContainer.getCommandStream(); + NEO::SVMAllocsManager *svmAllocsManager = + device->getDriverHandle()->getSvmAllocsManager(); + + StackVec subranges; + uint64_t postSyncAddressToFlush = 0; + for (uint32_t i = 0; i < numRanges; i++) { + const uint64_t pRange = reinterpret_cast(pRanges[i]); + size_t pRangeSize = pRangeSizes[i]; + uint64_t pFlushRange; + size_t pFlushRangeSize; + NEO::SvmAllocationData *allocData = + svmAllocsManager->getSVMAllocs()->get(pRanges[i]); + + if (allocData == nullptr || pRangeSize > allocData->size) { + continue; + } + + pFlushRange = pRange; + + if (NEO::L3Range::meetsMinimumAlignment(pRange) == false) { + pFlushRange = alignDown(pRange, MemoryConstants::pageSize); + } + pRangeSize = (pRange + pRangeSize) - pFlushRange; + pFlushRangeSize = pRangeSize; + if (NEO::L3Range::meetsMinimumAlignment(pRangeSize) == false) { + pFlushRangeSize = alignUp(pRangeSize, MemoryConstants::pageSize); + } + coverRangeExact(pFlushRange, + pFlushRangeSize, + subranges, + GfxFamily::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION); + } + for (size_t subrangeNumber = 0; subrangeNumber < subranges.size(); subrangeNumber += NEO::maxFlushSubrangeCount) { + size_t rangeCount = subranges.size() <= subrangeNumber + NEO::maxFlushSubrangeCount ? subranges.size() - subrangeNumber : NEO::maxFlushSubrangeCount; + NEO::Range range = CreateRange(subranges.begin() + subrangeNumber, rangeCount); + + NEO::flushGpuCache(commandStream, range, postSyncAddressToFlush, device->getHwInfo()); + } +} + +template +ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(ze_kernel_handle_t hKernel, + const ze_group_count_t *pThreadGroupDimensions, + ze_event_handle_t hEvent, + bool isIndirect, + bool isPredicate, + bool isCooperative) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER; + using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS; + + const auto kernel = Kernel::fromHandle(hKernel); + UNRECOVERABLE_IF(kernel == nullptr); + const auto functionImmutableData = kernel->getImmutableData(); + auto &kernelDescriptor = kernel->getKernelDescriptor(); + commandListPerThreadScratchSize = std::max(commandListPerThreadScratchSize, kernelDescriptor.kernelAttributes.perThreadScratchSize[0]); + + auto functionPreemptionMode = obtainFunctionPreemptionMode(kernel); + commandListPreemptionMode = std::min(commandListPreemptionMode, functionPreemptionMode); + + kernel->patchGlobalOffset(); + + if (isIndirect && pThreadGroupDimensions) { + prepareIndirectParams(pThreadGroupDimensions); + } + if (!isIndirect) { + kernel->setGroupCount(pThreadGroupDimensions->groupCountX, + pThreadGroupDimensions->groupCountY, + pThreadGroupDimensions->groupCountZ); + } + NEO::GraphicsAllocation *eventAlloc = nullptr; + uint64_t eventAddress = 0; + bool isTimestampEvent = false; + bool L3FlushEnable = false; + if (hEvent) { + auto event = Event::fromHandle(hEvent); + eventAlloc = &event->getAllocation(this->device); + commandContainer.addToResidencyContainer(eventAlloc); + L3FlushEnable = (!event->signalScope) ? false : true; + isTimestampEvent = event->isEventTimestampFlagSet(); + eventAddress = event->getPacketAddress(this->device); + } + + if (kernel->hasIndirectAllocationsAllowed()) { + UnifiedMemoryControls unifiedMemoryControls = kernel->getUnifiedMemoryControls(); + + if (unifiedMemoryControls.indirectDeviceAllocationsAllowed) { + this->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true; + } + if (unifiedMemoryControls.indirectHostAllocationsAllowed) { + this->unifiedMemoryControls.indirectHostAllocationsAllowed = true; + } + if (unifiedMemoryControls.indirectSharedAllocationsAllowed) { + this->unifiedMemoryControls.indirectSharedAllocationsAllowed = true; + } + + this->indirectAllocationsAllowed = true; + } + + NEO::Device *neoDevice = device->getNEODevice(); + + if (NEO::DebugManager.flags.EnableSWTags.get()) { + neoDevice->getRootDeviceEnvironment().tagsManager->insertTag( + *commandContainer.getCommandStream(), + *neoDevice, + kernelDescriptor.kernelMetadata.kernelName.c_str()); + } + + if (kernel->usesSyncBuffer()) { + auto retVal = (isCooperative + ? programSyncBuffer(*kernel, *neoDevice, pThreadGroupDimensions) + : ZE_RESULT_ERROR_INVALID_ARGUMENT); + if (retVal) { + return retVal; + } + } + + auto isMultiOsContextCapable = NEO::ImplicitScalingHelper::isImplicitScalingEnabled(device->getNEODevice()->getDeviceBitfield(), true); + updateStreamProperties(*kernel, isMultiOsContextCapable); + + KernelImp *kernelImp = static_cast(kernel); + this->containsStatelessUncachedResource |= kernelImp->getKernelRequiresUncachedMocs(); + uint32_t partitionCount = 0; + NEO::EncodeDispatchKernel::encode(commandContainer, + reinterpret_cast(pThreadGroupDimensions), + isIndirect, + isPredicate, + kernel, + eventAddress, + isTimestampEvent, + L3FlushEnable, + neoDevice, + commandListPreemptionMode, + this->containsStatelessUncachedResource, + kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, + partitionCount, + internalUsage); + if (hEvent) { + auto event = Event::fromHandle(hEvent); + if (isTimestampEvent && partitionCount > 1) { + event->setPacketsInUse(partitionCount); + } + } + + if (neoDevice->getDebugger()) { + auto *ssh = commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE); + auto surfaceState = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh); + auto debugSurface = device->getDebugSurface(); + auto mocs = device->getMOCS(true, false); + NEO::EncodeSurfaceState::encodeBuffer(surfaceState, debugSurface->getGpuAddress(), + debugSurface->getUnderlyingBufferSize(), mocs, + false, false, false, neoDevice->getNumAvailableDevices(), + debugSurface, neoDevice->getGmmHelper(), + kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, 1u); + } + // Attach Function residency to our CommandList residency + { + commandContainer.addToResidencyContainer(functionImmutableData->getIsaGraphicsAllocation()); + auto &residencyContainer = kernel->getResidencyContainer(); + for (auto resource : residencyContainer) { + commandContainer.addToResidencyContainer(resource); + } + } + + // Store PrintfBuffer from a kernel + { + if (kernelDescriptor.kernelAttributes.flags.usesPrintf) { + storePrintfFunction(kernel); + } + } + + return ZE_RESULT_SUCCESS; +} + +} // namespace L0 diff --git a/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_plus.inl b/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_plus.inl new file mode 100644 index 0000000000..2d2f5c7edd --- /dev/null +++ b/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_plus.inl @@ -0,0 +1,134 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/command_container/implicit_scaling.h" +#include "shared/source/command_stream/csr_definitions.h" +#include "shared/source/gmm_helper/gmm_helper.h" +#include "shared/source/helpers/api_specific_config.h" +#include "shared/source/helpers/hw_helper.h" +#include "shared/source/helpers/state_base_address.h" + +#include "level_zero/core/source/cmdqueue/cmdqueue_hw.h" + +#include "igfxfmid.h" +#include "pipe_control_args.h" + +namespace L0 { + +template +void CommandQueueHw::programStateBaseAddress(uint64_t gsba, bool useLocalMemoryForIndirectHeap, NEO::LinearStream &commandStream) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS; + if (NEO::ApiSpecificConfig::getBindlessConfiguration()) { + NEO::Device *neoDevice = device->getNEODevice(); + auto globalHeapsBase = neoDevice->getBindlessHeapsHelper()->getGlobalHeapsBase(); + NEO::PipeControlArgs args(true); + NEO::MemorySynchronizationCommands::addPipeControl(commandStream, args); + auto pSbaCmd = static_cast(commandStream.getSpace(sizeof(STATE_BASE_ADDRESS))); + STATE_BASE_ADDRESS sbaCmd; + bool multiOsContextCapable = NEO::ImplicitScalingHelper::isImplicitScalingEnabled(neoDevice->getDeviceBitfield(), true); + NEO::StateBaseAddressHelper::programStateBaseAddress(&sbaCmd, + nullptr, + nullptr, + nullptr, + 0, + true, + (device->getMOCS(true, false) >> 1), + neoDevice->getMemoryManager()->getInternalHeapBaseAddress(neoDevice->getRootDeviceIndex(), useLocalMemoryForIndirectHeap), + neoDevice->getMemoryManager()->getInternalHeapBaseAddress(neoDevice->getRootDeviceIndex(), neoDevice->getMemoryManager()->isLocalMemoryUsedForIsa(neoDevice->getRootDeviceIndex())), + globalHeapsBase, + true, + true, + neoDevice->getGmmHelper(), + multiOsContextCapable, + NEO::MemoryCompressionState::NotApplicable, + false, + 1u); + + if (NEO::Debugger::isDebugEnabled(internalUsage) && device->getL0Debugger()) { + + NEO::Debugger::SbaAddresses sbaAddresses = {}; + sbaAddresses.BindlessSurfaceStateBaseAddress = sbaCmd.getBindlessSurfaceStateBaseAddress(); + sbaAddresses.DynamicStateBaseAddress = sbaCmd.getDynamicStateBaseAddress(); + sbaAddresses.GeneralStateBaseAddress = sbaCmd.getGeneralStateBaseAddress(); + sbaAddresses.IndirectObjectBaseAddress = sbaCmd.getIndirectObjectBaseAddress(); + sbaAddresses.InstructionBaseAddress = sbaCmd.getInstructionBaseAddress(); + sbaAddresses.SurfaceStateBaseAddress = sbaCmd.getSurfaceStateBaseAddress(); + + device->getL0Debugger()->programSbaTrackingCommands(commandStream, sbaAddresses); + } + *pSbaCmd = sbaCmd; + + auto heap = neoDevice->getBindlessHeapsHelper()->getHeap(NEO::BindlessHeapsHelper::GLOBAL_SSH); + auto cmd = GfxFamily::cmdInitStateBindingTablePoolAlloc; + cmd.setBindingTablePoolBaseAddress(heap->getHeapGpuBase()); + cmd.setBindingTablePoolBufferSize(heap->getHeapSizeInPages()); + cmd.setSurfaceObjectControlStateIndexToMocsTables(neoDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER)); + + auto buffer = commandStream.getSpace(sizeof(cmd)); + *(typename GfxFamily::_3DSTATE_BINDING_TABLE_POOL_ALLOC *)buffer = cmd; + } + csr->setGSBAStateDirty(false); +} + +template +size_t CommandQueueHw::estimateStateBaseAddressCmdSize() { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS; + using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; + using _3DSTATE_BINDING_TABLE_POOL_ALLOC = typename GfxFamily::_3DSTATE_BINDING_TABLE_POOL_ALLOC; + if (NEO::ApiSpecificConfig::getBindlessConfiguration()) { + size_t size = sizeof(STATE_BASE_ADDRESS) + sizeof(PIPE_CONTROL) + sizeof(_3DSTATE_BINDING_TABLE_POOL_ALLOC); + return size; + } else { + return 0; + } +} + +constexpr uint32_t maxPtssIndex = 15u; + +template +void CommandQueueHw::handleScratchSpace(NEO::HeapContainer &sshHeaps, + NEO::ScratchSpaceController *scratchController, + bool &gsbaState, bool &frontEndState, + uint32_t perThreadScratchSpaceSize) { + if (perThreadScratchSpaceSize > 0) { + if (sshHeaps.size() > 0) { + uint32_t offsetIndex = maxPtssIndex * csr->getOsContext().getEngineType() + 1u; + scratchController->programHeaps(sshHeaps, offsetIndex, perThreadScratchSpaceSize, 0u, csr->peekTaskCount(), + csr->getOsContext(), gsbaState, frontEndState); + } + if (NEO::ApiSpecificConfig::getBindlessConfiguration()) { + scratchController->programBindlessSurfaceStateForScratch(device->getNEODevice()->getBindlessHeapsHelper(), perThreadScratchSpaceSize, 0u, csr->peekTaskCount(), + csr->getOsContext(), gsbaState, frontEndState, csr); + } + auto scratchAllocation = scratchController->getScratchSpaceAllocation(); + csr->makeResident(*scratchAllocation); + } +} + +template +void CommandQueueHw::patchCommands(CommandList &commandList, uint64_t scratchAddress) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + using CFE_STATE = typename GfxFamily::CFE_STATE; + + uint32_t lowScratchAddress = uint32_t(0xFFFFFFFF & scratchAddress); + + auto &commandsToPatch = commandList.getCommandsToPatch(); + for (auto &commandToPatch : commandsToPatch) { + switch (commandToPatch.type) { + case CommandList::CommandToPatch::FrontEndState: + reinterpret_cast(commandToPatch.pCommand)->setScratchSpaceBuffer(lowScratchAddress); + *reinterpret_cast(commandToPatch.pDestination) = *reinterpret_cast(commandToPatch.pCommand); + break; + default: + UNRECOVERABLE_IF(true); + } + } +} + +} // namespace L0 diff --git a/level_zero/core/source/xe_hp_core/CMakeLists.txt b/level_zero/core/source/xe_hp_core/CMakeLists.txt new file mode 100644 index 0000000000..372f47eb4e --- /dev/null +++ b/level_zero/core/source/xe_hp_core/CMakeLists.txt @@ -0,0 +1,21 @@ +# +# Copyright (C) 2021 Intel Corporation +# +# SPDX-License-Identifier: MIT +# + +if(SUPPORT_XE_HP_CORE) + set(HW_SOURCES_XE_HP_CORE + ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt + ${CMAKE_CURRENT_SOURCE_DIR}/debugger_xe_hp_core.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/image_xe_hp_core.inl + ${CMAKE_CURRENT_SOURCE_DIR}/sampler_xe_hp_core.inl + ${CMAKE_CURRENT_SOURCE_DIR}/enable_family_full_l0_xe_hp_core.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/l0_hw_helper_xe_hp_core.cpp + ) + + add_subdirectories() + + target_sources(${L0_STATIC_LIB_NAME} PRIVATE ${HW_SOURCES_XE_HP_CORE}) + set_property(GLOBAL PROPERTY L0_HW_SOURCES_XE_HP_CORE ${HW_SOURCES_XE_HP_CORE}) +endif() diff --git a/level_zero/core/source/xe_hp_core/debugger_xe_hp_core.cpp b/level_zero/core/source/xe_hp_core/debugger_xe_hp_core.cpp new file mode 100644 index 0000000000..937613a10a --- /dev/null +++ b/level_zero/core/source/xe_hp_core/debugger_xe_hp_core.cpp @@ -0,0 +1,19 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "level_zero/core/source/debugger/debugger_l0.inl" + +namespace NEO { +struct XeHpFamily; +using GfxFamily = XeHpFamily; + +} // namespace NEO + +namespace L0 { +template class DebuggerL0Hw; +DebuggerL0PopulateFactory debuggerXE_HP_CORE; +} // namespace L0 diff --git a/level_zero/core/source/xe_hp_core/enable_family_full_l0_xe_hp_core.cpp b/level_zero/core/source/xe_hp_core/enable_family_full_l0_xe_hp_core.cpp new file mode 100644 index 0000000000..f0aa75046c --- /dev/null +++ b/level_zero/core/source/xe_hp_core/enable_family_full_l0_xe_hp_core.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/command_stream/command_stream_receiver_hw.h" +#include "shared/source/command_stream/tbx_command_stream_receiver_hw.h" +#include "shared/source/helpers/populate_factory.h" + +#include "opencl/source/command_stream/aub_command_stream_receiver_hw.h" +#include "opencl/source/mem_obj/buffer.h" + +#include "level_zero/core/source/helpers/l0_populate_factory.h" +#include "level_zero/core/source/hw_helpers/l0_hw_helper.h" + +namespace NEO { + +typedef XeHpFamily Family; + +struct EnableL0XeHpCore { + EnableL0XeHpCore() { + populateFactoryTable>(); + populateFactoryTable>(); + populateFactoryTable>(); + populateFactoryTable>(); + L0::populateFactoryTable>(); + } +}; + +static EnableL0XeHpCore enable; +} // namespace NEO diff --git a/level_zero/core/source/xe_hp_core/image_xe_hp_core.inl b/level_zero/core/source/xe_hp_core/image_xe_hp_core.inl new file mode 100644 index 0000000000..e30d34b13c --- /dev/null +++ b/level_zero/core/source/xe_hp_core/image_xe_hp_core.inl @@ -0,0 +1,11 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/xe_hp_core/hw_cmds.h" +#include "shared/source/xe_hp_core/hw_info.h" + +#include "level_zero/core/source/image/image_hw.inl" diff --git a/level_zero/core/source/xe_hp_core/l0_hw_helper_xe_hp_core.cpp b/level_zero/core/source/xe_hp_core/l0_hw_helper_xe_hp_core.cpp new file mode 100644 index 0000000000..de3dbca066 --- /dev/null +++ b/level_zero/core/source/xe_hp_core/l0_hw_helper_xe_hp_core.cpp @@ -0,0 +1,27 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "level_zero/core/source/helpers/l0_populate_factory.h" +#include "level_zero/core/source/hw_helpers/l0_hw_helper_base.inl" +#include "level_zero/core/source/hw_helpers/l0_hw_helper_skl_plus.inl" + +#include "hw_cmds.h" + +namespace L0 { + +using Family = NEO::XeHpFamily; +static auto gfxCore = IGFX_XE_HP_CORE; + +template <> +void populateFactoryTable>() { + extern L0HwHelper *l0HwHelperFactory[IGFX_MAX_CORE]; + l0HwHelperFactory[gfxCore] = &L0HwHelperHw::get(); +} + +template class L0HwHelperHw; + +} // namespace L0 diff --git a/level_zero/core/source/xe_hp_core/sampler_xe_hp_core.inl b/level_zero/core/source/xe_hp_core/sampler_xe_hp_core.inl new file mode 100644 index 0000000000..5bb51a698c --- /dev/null +++ b/level_zero/core/source/xe_hp_core/sampler_xe_hp_core.inl @@ -0,0 +1,11 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/xe_hp_core/hw_cmds.h" +#include "shared/source/xe_hp_core/hw_info.h" + +#include "level_zero/core/source/sampler/sampler_hw.inl" diff --git a/level_zero/core/source/xe_hp_core/xehp/CMakeLists.txt b/level_zero/core/source/xe_hp_core/xehp/CMakeLists.txt new file mode 100644 index 0000000000..48bd93378b --- /dev/null +++ b/level_zero/core/source/xe_hp_core/xehp/CMakeLists.txt @@ -0,0 +1,18 @@ +# +# Copyright (C) 2021 Intel Corporation +# +# SPDX-License-Identifier: MIT +# + +if(SUPPORT_XEHP) + set(HW_SOURCES_XE_HP_CORE + ${HW_SOURCES_XE_HP_CORE} + ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt + ${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_xehp.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/cmdqueue_xehp.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/kernel_xehp.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/image_xehp.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/sampler_xehp.cpp + PARENT_SCOPE + ) +endif() diff --git a/level_zero/core/source/xe_hp_core/xehp/cmdlist_xehp.cpp b/level_zero/core/source/xe_hp_core/xehp/cmdlist_xehp.cpp new file mode 100644 index 0000000000..721e6155a2 --- /dev/null +++ b/level_zero/core/source/xe_hp_core/xehp/cmdlist_xehp.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "level_zero/core/source/cmdlist/cmdlist_hw.inl" +#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl" +#include "level_zero/core/source/cmdlist/cmdlist_hw_xehp_plus.inl" + +#include "cmdlist_extended.inl" + +namespace L0 { + +template struct CommandListCoreFamily; + +template <> +struct CommandListProductFamily : public CommandListCoreFamily { + using CommandListCoreFamily::CommandListCoreFamily; +}; + +static CommandListPopulateFactory> + populateXEHP; + +template <> +struct CommandListImmediateProductFamily : public CommandListCoreFamilyImmediate { + using CommandListCoreFamilyImmediate::CommandListCoreFamilyImmediate; +}; + +static CommandListImmediatePopulateFactory> + populateXEHPImmediate; + +} // namespace L0 diff --git a/level_zero/core/source/xe_hp_core/xehp/cmdqueue_xehp.cpp b/level_zero/core/source/xe_hp_core/xehp/cmdqueue_xehp.cpp new file mode 100644 index 0000000000..d3bea7cbc5 --- /dev/null +++ b/level_zero/core/source/xe_hp_core/xehp/cmdqueue_xehp.cpp @@ -0,0 +1,20 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/xe_hp_core/hw_cmds.h" +#include "shared/source/xe_hp_core/hw_info.h" + +#include "level_zero/core/source/cmdqueue/cmdqueue_hw.inl" +#include "level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_plus.inl" + +#include "cmdqueue_extended.inl" +namespace L0 { +template struct CommandQueueHw; +static CommandQueuePopulateFactory> + populateXEHP; + +} // namespace L0 diff --git a/level_zero/core/source/xe_hp_core/xehp/image_xehp.cpp b/level_zero/core/source/xe_hp_core/xehp/image_xehp.cpp new file mode 100644 index 0000000000..0aa5a867c7 --- /dev/null +++ b/level_zero/core/source/xe_hp_core/xehp/image_xehp.cpp @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "level_zero/core/source/xe_hp_core/image_xe_hp_core.inl" + +namespace L0 { + +template <> +struct ImageProductFamily : public ImageCoreFamily { + using ImageCoreFamily::ImageCoreFamily; + + ze_result_t initialize(Device *device, const ze_image_desc_t *desc) override { + return ImageCoreFamily::initialize(device, desc); + }; +}; + +static ImagePopulateFactory> populateXEHP; + +} // namespace L0 diff --git a/level_zero/core/source/xe_hp_core/xehp/kernel_xehp.cpp b/level_zero/core/source/xe_hp_core/xehp/kernel_xehp.cpp new file mode 100644 index 0000000000..c179d2d8d2 --- /dev/null +++ b/level_zero/core/source/xe_hp_core/xehp/kernel_xehp.cpp @@ -0,0 +1,14 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "level_zero/core/source/kernel/kernel_hw.h" + +namespace L0 { + +static KernelPopulateFactory> populateXEHP; + +} // namespace L0 diff --git a/level_zero/core/source/xe_hp_core/xehp/sampler_xehp.cpp b/level_zero/core/source/xe_hp_core/xehp/sampler_xehp.cpp new file mode 100644 index 0000000000..0e861c16c9 --- /dev/null +++ b/level_zero/core/source/xe_hp_core/xehp/sampler_xehp.cpp @@ -0,0 +1,19 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "level_zero/core/source/xe_hp_core/sampler_xe_hp_core.inl" + +namespace L0 { + +template <> +struct SamplerProductFamily : public SamplerCoreFamily { + using SamplerCoreFamily::SamplerCoreFamily; +}; + +static SamplerPopulateFactory> populateXEHP; + +} // namespace L0 diff --git a/opencl/extensions/public/cl_ext_private.h b/opencl/extensions/public/cl_ext_private.h index c370af790a..a52b623194 100644 --- a/opencl/extensions/public/cl_ext_private.h +++ b/opencl/extensions/public/cl_ext_private.h @@ -211,6 +211,27 @@ typedef cl_bitfield cl_device_feature_capabilities_intel; /* For GPU devices, version 1.0.0: */ #define CL_DEVICE_FEATURE_FLAG_DP4A_INTEL (1 << 0) +////// RESOURCE BARRIER EXT +#define CL_COMMAND_RESOURCE_BARRIER 0x10010 + +typedef cl_uint cl_resource_barrier_type; +#define CL_RESOURCE_BARRIER_TYPE_ACQUIRE 0x1 // FLUSH+EVICT +#define CL_RESOURCE_BARRIER_TYPE_RELEASE 0x2 // FLUSH +#define CL_RESOURCE_BARRIER_TYPE_DISCARD 0x3 // DISCARD + +typedef cl_uint cl_resource_memory_scope; +#define CL_MEMORY_SCOPE_DEVICE 0x0 // INCLUDES CROSS-TILE +#define CL_MEMORY_SCOPE_ALL_SVM_DEVICES 0x1 // CL_MEMORY_SCOPE_DEVICE + CROSS-DEVICE + +#pragma pack(push, 1) +typedef struct _cl_resource_barrier_descriptor_intel { + void *svm_allocation_pointer; + cl_mem mem_object; + cl_resource_barrier_type type; + cl_resource_memory_scope scope; +} cl_resource_barrier_descriptor_intel; +#pragma pack(pop) + /**************************************** * cl_khr_pci_bus_info extension * ***************************************/ diff --git a/opencl/source/command_queue/CMakeLists.txt b/opencl/source/command_queue/CMakeLists.txt index aa49e02a58..599c6df22c 100644 --- a/opencl/source/command_queue/CMakeLists.txt +++ b/opencl/source/command_queue/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2018-2020 Intel Corporation +# Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # @@ -27,7 +27,7 @@ set(RUNTIME_SRCS_COMMAND_QUEUE ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_buffer.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_buffer_rect.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_image.h - ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/enqueue_resource_barrier.h + ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_resource_barrier.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_svm.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_buffer.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_write_buffer_rect.h @@ -41,8 +41,18 @@ set(RUNTIME_SRCS_COMMAND_QUEUE ${CMAKE_CURRENT_SOURCE_DIR}/hardware_interface_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/hardware_interface_bdw_plus.inl ${CMAKE_CURRENT_SOURCE_DIR}/local_work_size.cpp - ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/resource_barrier.h + ${CMAKE_CURRENT_SOURCE_DIR}/resource_barrier.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/resource_barrier.h ) + +if(SUPPORT_XEHP_PLUS) + list(APPEND RUNTIME_SRCS_COMMAND_QUEUE + ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_xehp_plus.inl + ${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker_xehp_plus.inl + ${CMAKE_CURRENT_SOURCE_DIR}/hardware_interface_xehp_plus.inl + ) +endif() + target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_COMMAND_QUEUE}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_COMMAND_QUEUE ${RUNTIME_SRCS_COMMAND_QUEUE}) add_subdirectories() diff --git a/opencl/source/command_queue/command_queue_hw_xehp_plus.inl b/opencl/source/command_queue/command_queue_hw_xehp_plus.inl new file mode 100644 index 0000000000..4188253f2f --- /dev/null +++ b/opencl/source/command_queue/command_queue_hw_xehp_plus.inl @@ -0,0 +1,73 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/helpers/cache_flush_xehp_plus.inl" + +#include "opencl/extensions/public/cl_ext_private.h" +#include "opencl/source/command_queue/command_queue_hw_base.inl" +#include "opencl/source/memory_manager/resource_surface.h" + +namespace NEO { + +template +void CommandQueueHw::runSchedulerSimulation(DeviceQueueHw &devQueueHw, Kernel &parentKernel) { +} + +template <> +void CommandQueueHw::submitCacheFlush(Surface **surfaces, + size_t numSurfaces, + LinearStream *commandStream, + uint64_t postSyncAddress) { + if constexpr (Family::isUsingL3Control) { + StackVec subranges; + for (auto surface : CreateRange(surfaces, numSurfaces)) { + auto resource = reinterpret_cast(surface); + auto alloc = resource->getGraphicsAllocation(); + coverRangeExact(alloc->getGpuAddress(), alloc->getUnderlyingBufferSize(), subranges, resource->resourceType); + } + + for (size_t subrangeNumber = 0; subrangeNumber < subranges.size(); subrangeNumber += maxFlushSubrangeCount) { + size_t rangeCount = subranges.size() <= subrangeNumber + maxFlushSubrangeCount ? subranges.size() - subrangeNumber : maxFlushSubrangeCount; + Range range = CreateRange(subranges.begin() + subrangeNumber, rangeCount); + uint64_t postSyncAddressToFlush = 0; + if (rangeCount < maxFlushSubrangeCount || subranges.size() - subrangeNumber - maxFlushSubrangeCount == 0) { + postSyncAddressToFlush = postSyncAddress; + } + + flushGpuCache(commandStream, range, postSyncAddressToFlush, device->getHardwareInfo()); + } + } +} + +template <> +bool CommandQueueHw::isCacheFlushCommand(uint32_t commandType) const { + return commandType == CL_COMMAND_RESOURCE_BARRIER; +} + +template <> +LinearStream &getCommandStream(CommandQueue &commandQueue, const CsrDependencies &csrDeps, bool reserveProfilingCmdsSpace, bool reservePerfCounterCmdsSpace, bool blitEnqueue, const MultiDispatchInfo &multiDispatchInfo, Surface **surfaces, size_t numSurfaces, bool isMarkerWithProfiling) { + size_t expectedSizeCS = 0; + bool usePostSync = false; + if (commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { + expectedSizeCS += TimestampPacketHelper::getRequiredCmdStreamSize(csrDeps); + usePostSync = true; + } + + if constexpr (Family::isUsingL3Control) { + StackVec subranges; + for (auto surface : CreateRange(surfaces, numSurfaces)) { + ResourceSurface *resource = reinterpret_cast(surface); + auto alloc = resource->getGraphicsAllocation(); + coverRangeExact(alloc->getGpuAddress(), alloc->getUnderlyingBufferSize(), subranges, resource->resourceType); + } + expectedSizeCS += getSizeNeededToFlushGpuCache(subranges, usePostSync); + } + + return commandQueue.getCS(expectedSizeCS); +} + +} // namespace NEO diff --git a/opencl/source/command_queue/enqueue_resource_barrier.h b/opencl/source/command_queue/enqueue_resource_barrier.h index 1b6edb6c5f..33173c9c0a 100644 --- a/opencl/source/command_queue/enqueue_resource_barrier.h +++ b/opencl/source/command_queue/enqueue_resource_barrier.h @@ -1,12 +1,21 @@ /* - * Copyright (C) 2019-2020 Intel Corporation + * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once +#include "shared/source/command_stream/command_stream_receiver.h" +#include "shared/source/device/device.h" + #include "opencl/source/command_queue/command_queue_hw.h" +#include "opencl/source/event/event.h" +#include "opencl/source/memory_manager/resource_surface.h" + +#include "resource_barrier.h" + +#include namespace NEO { @@ -15,6 +24,14 @@ cl_int CommandQueueHw::enqueueResourceBarrier(BarrierCommand *resourc cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) { + MultiDispatchInfo multiDispatch; + enqueueHandler(resourceBarrier->surfacePtrs.begin(), + resourceBarrier->numSurfaces, + false, + multiDispatch, + numEventsInWaitList, + eventWaitList, + event); return CL_SUCCESS; } -} // namespace NEO \ No newline at end of file +} // namespace NEO diff --git a/opencl/source/command_queue/gpgpu_walker_xehp_plus.inl b/opencl/source/command_queue/gpgpu_walker_xehp_plus.inl new file mode 100644 index 0000000000..361b82860c --- /dev/null +++ b/opencl/source/command_queue/gpgpu_walker_xehp_plus.inl @@ -0,0 +1,203 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once +#include "shared/source/command_container/implicit_scaling.h" +#include "shared/source/debug_settings/debug_settings_manager.h" +#include "shared/source/execution_environment/root_device_environment.h" +#include "shared/source/gmm_helper/gmm_helper.h" +#include "shared/source/helpers/cache_flush_xehp_plus.inl" +#include "shared/source/helpers/hw_helper.h" +#include "shared/source/helpers/l3_range.h" +#include "shared/source/helpers/simd_helper.h" + +#include "opencl/source/command_queue/gpgpu_walker_base.inl" +#include "opencl/source/platform/platform.h" + +namespace NEO { + +template +size_t GpgpuWalkerHelper::setGpgpuWalkerThreadData( + WALKER_TYPE *walkerCmd, + const KernelDescriptor &kernelDescriptor, + const size_t globalOffsets[3], + const size_t startWorkGroups[3], + const size_t numWorkGroups[3], + const size_t localWorkSizesIn[3], + uint32_t simd, + uint32_t workDim, + bool localIdsGenerationByRuntime, + bool inlineDataProgrammingRequired, + uint32_t requiredWorkGroupOrder) { + + bool kernelUsesLocalIds = kernelDescriptor.kernelAttributes.numLocalIdChannels > 0; + + auto localWorkSize = localWorkSizesIn[0] * localWorkSizesIn[1] * localWorkSizesIn[2]; + + walkerCmd->setThreadGroupIdXDimension(static_cast(numWorkGroups[0])); + walkerCmd->setThreadGroupIdYDimension(static_cast(numWorkGroups[1])); + walkerCmd->setThreadGroupIdZDimension(static_cast(numWorkGroups[2])); + + // compute executionMask - to tell which SIMD lines are active within thread + auto remainderSimdLanes = localWorkSize & (simd - 1); + uint64_t executionMask = maxNBitValue(remainderSimdLanes); + if (!executionMask) { + executionMask = maxNBitValue((simd == 1) ? 32 : simd); + } + + walkerCmd->setExecutionMask(static_cast(executionMask)); + walkerCmd->setSimdSize(getSimdConfig>(simd)); + walkerCmd->setMessageSimd(walkerCmd->getSimdSize()); + + walkerCmd->setThreadGroupIdStartingX(static_cast(startWorkGroups[0])); + walkerCmd->setThreadGroupIdStartingY(static_cast(startWorkGroups[1])); + walkerCmd->setThreadGroupIdStartingZ(static_cast(startWorkGroups[2])); + + //1) cross-thread inline data will be put into R1, but if kernel uses local ids, then cross-thread should be put further back + //so whenever local ids are driver or hw generated, reserve space by setting right values for emitLocalIds + //2) Auto-generation of local ids should be possible, when in fact local ids are used + if (!localIdsGenerationByRuntime && kernelUsesLocalIds) { + uint32_t emitLocalIdsForDim = 0; + if (kernelDescriptor.kernelAttributes.localId[0]) { + emitLocalIdsForDim |= (1 << 0); + } + if (kernelDescriptor.kernelAttributes.localId[1]) { + emitLocalIdsForDim |= (1 << 1); + } + if (kernelDescriptor.kernelAttributes.localId[2]) { + emitLocalIdsForDim |= (1 << 2); + } + walkerCmd->setEmitLocalId(emitLocalIdsForDim); + } + if (inlineDataProgrammingRequired == true) { + walkerCmd->setEmitInlineParameter(1); + } + + if ((!localIdsGenerationByRuntime) && kernelUsesLocalIds) { + walkerCmd->setLocalXMaximum(static_cast(localWorkSizesIn[0] - 1)); + walkerCmd->setLocalYMaximum(static_cast(localWorkSizesIn[1] - 1)); + walkerCmd->setLocalZMaximum(static_cast(localWorkSizesIn[2] - 1)); + + walkerCmd->setGenerateLocalId(1); + walkerCmd->setWalkOrder(requiredWorkGroupOrder); + } + + return localWorkSize; +} + +template +void GpgpuWalkerHelper::dispatchScheduler( + LinearStream &commandStream, + DeviceQueueHw &devQueueHw, + PreemptionMode preemptionMode, + SchedulerKernel &scheduler, + IndirectHeap *ssh, + IndirectHeap *dsh, + bool isCcsUsed) { + UNRECOVERABLE_IF(true); +} + +template +void GpgpuWalkerHelper::setupTimestampPacket(LinearStream *cmdStream, + WALKER_TYPE *walkerCmd, + TagNodeBase *timestampPacketNode, + const RootDeviceEnvironment &rootDeviceEnvironment) { + using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER; + + auto &postSyncData = walkerCmd->getPostSync(); + postSyncData.setDataportPipelineFlush(true); + + auto gmmHelper = rootDeviceEnvironment.getGmmHelper(); + postSyncData.setMocs(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED)); + + EncodeDispatchKernel::adjustTimestampPacket(*walkerCmd, *rootDeviceEnvironment.getHardwareInfo()); + + if (DebugManager.flags.OverridePostSyncMocs.get() != -1) { + postSyncData.setMocs(DebugManager.flags.OverridePostSyncMocs.get()); + } + + if (DebugManager.flags.UseImmDataWriteModeOnPostSyncOperation.get()) { + postSyncData.setOperation(GfxFamily::POSTSYNC_DATA::OPERATION::OPERATION_WRITE_IMMEDIATE_DATA); + auto contextEndAddress = TimestampPacketHelper::getContextEndGpuAddress(*timestampPacketNode); + postSyncData.setDestinationAddress(contextEndAddress); + postSyncData.setImmediateData(0x2'0000'0002); + } else { + postSyncData.setOperation(GfxFamily::POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP); + auto contextStartAddress = TimestampPacketHelper::getContextStartGpuAddress(*timestampPacketNode); + postSyncData.setDestinationAddress(contextStartAddress); + } + if (DebugManager.flags.OverrideSystolicInComputeWalker.get() != -1) { + walkerCmd->setSystolicModeEnable((DebugManager.flags.OverrideSystolicInComputeWalker.get())); + } +} + +template +void GpgpuWalkerHelper::adjustMiStoreRegMemMode(MI_STORE_REG_MEM *storeCmd) { + storeCmd->setMmioRemapEnable(true); +} + +template +size_t EnqueueOperation::getSizeRequiredCSKernel(bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel, const DispatchInfo &dispatchInfo) { + size_t numPipeControls = MemorySynchronizationCommands::isPipeControlWArequired(commandQueue.getDevice().getHardwareInfo()) ? 2 : 1; + + size_t size = sizeof(typename GfxFamily::COMPUTE_WALKER) + + (sizeof(typename GfxFamily::PIPE_CONTROL) * numPipeControls) + + HardwareCommandsHelper::getSizeRequiredCS() + + EncodeMemoryPrefetch::getSizeForMemoryPrefetch(pKernel->getKernelInfo().heapInfo.KernelHeapSize); + auto devices = commandQueue.getGpgpuCommandStreamReceiver().getOsContext().getDeviceBitfield(); + auto partitionWalker = ImplicitScalingHelper::isImplicitScalingEnabled(devices, + !pKernel->isSingleSubdevicePreferred()); + if (partitionWalker) { + Vec3 groupStart = dispatchInfo.getStartOfWorkgroups(); + Vec3 groupCount = dispatchInfo.getNumberOfWorkgroups(); + UNRECOVERABLE_IF(groupCount.x == 0); + const bool staticPartitioning = commandQueue.getGpgpuCommandStreamReceiver().isStaticWorkPartitioningEnabled(); + size += static_cast(ImplicitScalingDispatch::getSize(false, staticPartitioning, devices, groupStart, groupCount)); + } + + size += PerformanceCounters::getGpuCommandsSize(commandQueue, reservePerfCounters); + + return size; +} + +template +size_t EnqueueOperation::getSizeRequiredForTimestampPacketWrite() { + return 0; +} + +template +void GpgpuWalkerHelper::dispatchProfilingCommandsStart(TagNodeBase &hwTimeStamps, LinearStream *commandStream, const HardwareInfo &hwInfo) { +} + +template +void GpgpuWalkerHelper::dispatchProfilingCommandsEnd(TagNodeBase &hwTimeStamps, LinearStream *commandStream, const HardwareInfo &hwInfo) { +} + +template +size_t EnqueueOperation::getSizeForCacheFlushAfterWalkerCommands(const Kernel &kernel, const CommandQueue &commandQueue) { + size_t size = 0; + + if (kernel.requiresCacheFlushCommand(commandQueue)) { + size += sizeof(typename GfxFamily::PIPE_CONTROL); + + if constexpr (GfxFamily::isUsingL3Control) { + StackVec allocationsForCacheFlush; + kernel.getAllocationsForCacheFlush(allocationsForCacheFlush); + + StackVec subranges; + for (auto &allocation : allocationsForCacheFlush) { + coverRangeExact(allocation->getGpuAddress(), allocation->getUnderlyingBufferSize(), subranges, GfxFamily::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION); + } + + size += getSizeNeededToFlushGpuCache(subranges, true); + } + } + + return size; +} + +} // namespace NEO diff --git a/opencl/source/command_queue/hardware_interface_xehp_plus.inl b/opencl/source/command_queue/hardware_interface_xehp_plus.inl new file mode 100644 index 0000000000..87a253b047 --- /dev/null +++ b/opencl/source/command_queue/hardware_interface_xehp_plus.inl @@ -0,0 +1,148 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once +#include "shared/source/command_container/command_encoder.h" +#include "shared/source/command_container/implicit_scaling.h" +#include "shared/source/debug_settings/debug_settings_manager.h" +#include "shared/source/helpers/engine_node_helper.h" +#include "shared/source/os_interface/os_context.h" +#include "shared/source/os_interface/os_interface.h" +#include "shared/source/utilities/tag_allocator.h" + +#include "opencl/source/command_queue/hardware_interface_base.inl" + +namespace NEO { + +template +inline void HardwareInterface::getDefaultDshSpace( + const size_t &offsetInterfaceDescriptorTable, + CommandQueue &commandQueue, + const MultiDispatchInfo &multiDispatchInfo, + size_t &totalInterfaceDescriptorTableSize, + Kernel *parentKernel, + IndirectHeap *dsh, + LinearStream *commandStream) { + + if (parentKernel) { + dsh->getSpace(commandQueue.getContext().getDefaultDeviceQueue()->getDshOffset() - dsh->getUsed()); + } +} + +template +inline void HardwareInterface::dispatchWorkarounds( + LinearStream *commandStream, + CommandQueue &commandQueue, + Kernel &kernel, + const bool &enable) { +} + +template +inline void HardwareInterface::programWalker( + LinearStream &commandStream, + Kernel &kernel, + CommandQueue &commandQueue, + TimestampPacketContainer *currentTimestampPacketNodes, + IndirectHeap &dsh, + IndirectHeap &ioh, + IndirectHeap &ssh, + size_t globalWorkSizes[3], + size_t localWorkSizes[3], + PreemptionMode preemptionMode, + size_t currentDispatchIndex, + uint32_t &interfaceDescriptorIndex, + const DispatchInfo &dispatchInfo, + size_t offsetInterfaceDescriptorTable, + Vec3 &numberOfWorkgroups, + Vec3 &startOfWorkgroups) { + + using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER; + + COMPUTE_WALKER walkerCmd = GfxFamily::cmdInitGpgpuWalker; + auto &kernelInfo = kernel.getKernelInfo(); + + uint32_t dim = dispatchInfo.getDim(); + uint32_t simd = kernelInfo.getMaxSimdSize(); + + auto numChannels = kernelInfo.kernelDescriptor.kernelAttributes.numLocalIdChannels; + + size_t globalOffsets[3] = {dispatchInfo.getOffset().x, dispatchInfo.getOffset().y, dispatchInfo.getOffset().z}; + size_t startWorkGroups[3] = {startOfWorkgroups.x, startOfWorkgroups.y, startOfWorkgroups.z}; + size_t numWorkGroups[3] = {numberOfWorkgroups.x, numberOfWorkgroups.y, numberOfWorkgroups.z}; + uint32_t requiredWalkOrder = 0u; + + bool localIdsGenerationByRuntime = EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired( + numChannels, + localWorkSizes, + std::array{{kernelInfo.kernelDescriptor.kernelAttributes.workgroupWalkOrder[0], + kernelInfo.kernelDescriptor.kernelAttributes.workgroupWalkOrder[1], + kernelInfo.kernelDescriptor.kernelAttributes.workgroupWalkOrder[2]}}, + kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresWorkgroupWalkOrder, + requiredWalkOrder, + simd); + + bool inlineDataProgrammingRequired = HardwareCommandsHelper::inlineDataProgrammingRequired(kernel); + auto idd = &walkerCmd.getInterfaceDescriptor(); + + if (currentTimestampPacketNodes && commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { + auto timestampPacket = currentTimestampPacketNodes->peekNodes().at(currentDispatchIndex); + GpgpuWalkerHelper::setupTimestampPacket(&commandStream, &walkerCmd, timestampPacket, commandQueue.getDevice().getRootDeviceEnvironment()); + } + + auto isCcsUsed = EngineHelpers::isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType()); + auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(kernel); + + if (auto kernelAllocation = kernelInfo.getGraphicsAllocation()) { + EncodeMemoryPrefetch::programMemoryPrefetch(commandStream, *kernelAllocation, kernelInfo.heapInfo.KernelHeapSize, 0, commandQueue.getDevice().getHardwareInfo()); + } + + HardwareCommandsHelper::sendIndirectState( + commandStream, + dsh, + ioh, + ssh, + kernel, + kernel.getKernelStartOffset(localIdsGenerationByRuntime, kernelUsesLocalIds, isCcsUsed), + simd, + localWorkSizes, + offsetInterfaceDescriptorTable, + interfaceDescriptorIndex, + preemptionMode, + &walkerCmd, + idd, + localIdsGenerationByRuntime, + commandQueue.getDevice()); + + GpgpuWalkerHelper::setGpgpuWalkerThreadData(&walkerCmd, kernelInfo.kernelDescriptor, globalOffsets, startWorkGroups, + numWorkGroups, localWorkSizes, simd, dim, + localIdsGenerationByRuntime, inlineDataProgrammingRequired, requiredWalkOrder); + + EncodeDispatchKernel::encodeAdditionalWalkerFields(commandQueue.getDevice().getHardwareInfo(), walkerCmd); + + auto devices = commandQueue.getGpgpuCommandStreamReceiver().getOsContext().getDeviceBitfield(); + auto partitionWalker = ImplicitScalingHelper::isImplicitScalingEnabled(devices, !kernel.isSingleSubdevicePreferred()); + + if (partitionWalker) { + const uint64_t workPartitionAllocationGpuVa = commandQueue.getDevice().getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocationGpuAddress(); + uint32_t partitionCount = 0u; + ImplicitScalingDispatch::dispatchCommands(commandStream, + walkerCmd, + devices, + partitionCount, + false, + false, + kernel.usesImages(), + workPartitionAllocationGpuVa); + + auto timestampPacket = currentTimestampPacketNodes->peekNodes().at(currentDispatchIndex); + timestampPacket->setPacketsUsed(partitionCount); + } else { + auto computeWalkerOnStream = reinterpret_cast(commandStream.getSpace(sizeof(typename GfxFamily::COMPUTE_WALKER))); + *computeWalkerOnStream = walkerCmd; + } +} +} // namespace NEO diff --git a/opencl/source/command_queue/resource_barrier.cpp b/opencl/source/command_queue/resource_barrier.cpp new file mode 100644 index 0000000000..2189e25e87 --- /dev/null +++ b/opencl/source/command_queue/resource_barrier.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "opencl/source/command_queue/resource_barrier.h" + +#include "shared/source/memory_manager/unified_memory_manager.h" +#include "shared/source/utilities/range.h" + +#include "opencl/source/command_queue/command_queue.h" +#include "opencl/source/context/context.h" +#include "opencl/source/helpers/validators.h" + +namespace NEO { +BarrierCommand::BarrierCommand(CommandQueue *commandQueue, const cl_resource_barrier_descriptor_intel *descriptors, uint32_t numDescriptors) : numSurfaces(numDescriptors) { + for (auto description : CreateRange(descriptors, numDescriptors)) { + GraphicsAllocation *allocation; + if (description.mem_object) { + MemObj *memObj = nullptr; + WithCastToInternal(description.mem_object, &memObj); + allocation = memObj->getGraphicsAllocation(commandQueue->getDevice().getRootDeviceIndex()); + } else { + auto svmData = commandQueue->getContext().getSVMAllocsManager()->getSVMAlloc(description.svm_allocation_pointer); + UNRECOVERABLE_IF(svmData == nullptr); + allocation = svmData->gpuAllocations.getGraphicsAllocation(commandQueue->getDevice().getRootDeviceIndex()); + } + surfaces.push_back(ResourceSurface(allocation, description.type, description.scope)); + } + for (auto it = surfaces.begin(), end = surfaces.end(); it != end; it++) { + surfacePtrs.push_back(it); + } +} +} // namespace NEO diff --git a/opencl/source/command_queue/resource_barrier.h b/opencl/source/command_queue/resource_barrier.h index 0850b1208f..09a9c29e3d 100644 --- a/opencl/source/command_queue/resource_barrier.h +++ b/opencl/source/command_queue/resource_barrier.h @@ -1,10 +1,22 @@ /* - * Copyright (C) 2019-2020 Intel Corporation + * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ +#pragma once +#include "opencl/extensions/public/cl_ext_private.h" +#include "opencl/source/memory_manager/resource_surface.h" + namespace NEO { -struct BarrierCommand {}; -} // namespace NEO \ No newline at end of file +class CommandQueue; +class BarrierCommand { + public: + BarrierCommand(CommandQueue *commandQueue, const cl_resource_barrier_descriptor_intel *descriptors, uint32_t numDescriptors); + ~BarrierCommand() {} + uint32_t numSurfaces = 0; + StackVec surfaces; + StackVec surfacePtrs; +}; +} // namespace NEO diff --git a/opencl/source/command_stream/CMakeLists.txt b/opencl/source/command_stream/CMakeLists.txt index 3a898c6427..ed1fb0603b 100644 --- a/opencl/source/command_stream/CMakeLists.txt +++ b/opencl/source/command_stream/CMakeLists.txt @@ -22,6 +22,14 @@ set(RUNTIME_SRCS_COMMAND_STREAM ${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/command_stream_receiver_simulated_hw.h ) +if(SUPPORT_XEHP_PLUS) + list(APPEND RUNTIME_SRCS_COMMAND_STREAM + ${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver_hw_xehp_plus.inl + ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_simulated_common_hw_xehp_plus.inl + ${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver_xehp_plus.inl + ) +endif() + target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_COMMAND_STREAM}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_COMMAND_STREAM ${RUNTIME_SRCS_COMMAND_STREAM}) add_subdirectories() diff --git a/opencl/source/command_stream/aub_command_stream_receiver_hw_xehp_plus.inl b/opencl/source/command_stream/aub_command_stream_receiver_hw_xehp_plus.inl new file mode 100644 index 0000000000..89e883d692 --- /dev/null +++ b/opencl/source/command_stream/aub_command_stream_receiver_hw_xehp_plus.inl @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/aub_mem_dump/page_table_entry_bits.h" +#include "shared/source/helpers/engine_node_helper.h" + +#include "opencl/source/command_stream/aub_command_stream_receiver_hw_base.inl" + +namespace NEO { + +template +constexpr uint32_t AUBCommandStreamReceiverHw::getMaskAndValueForPollForCompletion() { + return 0x00008000; +} + +template +void AUBCommandStreamReceiverHw::addContextToken(uint32_t dumpHandle) { + AUB::createContext(*stream, dumpHandle); +} + +template +uint32_t AUBCommandStreamReceiverHw::getGUCWorkQueueItemHeader() { + if (EngineHelpers::isCcs(osContext->getEngineType())) { + return 0x00030401; + } + return 0x00030001; +} + +template +int AUBCommandStreamReceiverHw::getAddressSpaceFromPTEBits(uint64_t entryBits) const { + if (entryBits & BIT(PageTableEntry::localMemoryBit)) { + return AubMemDump::AddressSpaceValues::TraceLocal; + } + return AubMemDump::AddressSpaceValues::TraceNonlocal; +} + +} // namespace NEO diff --git a/opencl/source/command_stream/command_stream_receiver_simulated_common_hw_xehp_plus.inl b/opencl/source/command_stream/command_stream_receiver_simulated_common_hw_xehp_plus.inl new file mode 100644 index 0000000000..037bb6a2f2 --- /dev/null +++ b/opencl/source/command_stream/command_stream_receiver_simulated_common_hw_xehp_plus.inl @@ -0,0 +1,100 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/aub_mem_dump/page_table_entry_bits.h" +#include "shared/source/debug_settings/debug_settings_manager.h" +#include "shared/source/helpers/hw_helper.h" +#include "shared/source/memory_manager/memory_banks.h" +#include "shared/source/memory_manager/memory_pool.h" +#include "shared/source/memory_manager/physical_address_allocator.h" + +#include "opencl/source/command_stream/command_stream_receiver_simulated_common_hw_base.inl" + +namespace NEO { + +template +void CommandStreamReceiverSimulatedCommonHw::initGlobalMMIO() { + for (auto &mmioPair : AUBFamilyMapper::globalMMIO) { + stream->writeMMIO(mmioPair.first, mmioPair.second); + } + + if (this->localMemoryEnabled) { + MMIOPair guCntl = {0x00101010, 0x00000080}; //GU_CNTL + stream->writeMMIO(guCntl.first, guCntl.second); + + MMIOPair lmemCfg = {0x0000cf58, 0x80000000}; //LMEM_CFG + stream->writeMMIO(lmemCfg.first, lmemCfg.second); + + MMIOPair tileAddrRange[] = {{0x00004900, 0x0001}, + {0x00004904, 0x0001}, + {0x00004908, 0x0001}, + {0x0000490c, 0x0001}}; //XEHP_TILE_ADDR_RANGE + + const uint32_t numberOfTiles = 4; + const uint32_t localMemorySizeGB = static_cast(AubHelper::getMemBankSize(&this->peekHwInfo()) / MemoryConstants::gigaByte); + + uint32_t localMemoryBaseAddressInGB = 0x0; + + for (uint32_t i = 0; i < numberOfTiles; i++) { + tileAddrRange[i].second |= localMemoryBaseAddressInGB << 1; + tileAddrRange[i].second |= localMemorySizeGB << 8; + stream->writeMMIO(tileAddrRange[i].first, tileAddrRange[i].second); + + localMemoryBaseAddressInGB += localMemorySizeGB; + } + } +} + +template +uint64_t CommandStreamReceiverSimulatedCommonHw::getPPGTTAdditionalBits(GraphicsAllocation *gfxAllocation) { + if (DebugManager.flags.AUBDumpForceAllToLocalMemory.get() || + (gfxAllocation && gfxAllocation->getMemoryPool() == MemoryPool::LocalMemory)) { + return BIT(PageTableEntry::presentBit) | BIT(PageTableEntry::writableBit) | BIT(PageTableEntry::localMemoryBit); + } + return BIT(PageTableEntry::presentBit) | BIT(PageTableEntry::writableBit); +} + +template +void CommandStreamReceiverSimulatedCommonHw::getGTTData(void *memory, AubGTTData &data) { + data.present = true; + data.localMemory = this->localMemoryEnabled; +} + +template +uint32_t CommandStreamReceiverSimulatedCommonHw::getMemoryBankForGtt() const { + auto deviceIndex = getDeviceIndex(); + if (this->localMemoryEnabled) { + return MemoryBanks::getBankForLocalMemory(deviceIndex); + } + return MemoryBanks::getBank(deviceIndex); +} + +template +const AubMemDump::LrcaHelper &CommandStreamReceiverSimulatedCommonHw::getCsTraits(aub_stream::EngineType engineType) { + return *AUBFamilyMapper::csTraits[engineType]; +} + +template +void CommandStreamReceiverSimulatedCommonHw::initEngineMMIO() { + auto mmioList = AUBFamilyMapper::perEngineMMIO[osContext->getEngineType()]; + DEBUG_BREAK_IF(!mmioList); + for (auto &mmioPair : *mmioList) { + stream->writeMMIO(mmioPair.first, mmioPair.second); + } +} + +template +void CommandStreamReceiverSimulatedCommonHw::submitLRCA(const MiContextDescriptorReg &contextDescriptor) { + auto mmioBase = getCsTraits(osContext->getEngineType()).mmioBase; + stream->writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2510), contextDescriptor.ulData[0]); + stream->writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2514), contextDescriptor.ulData[1]); + + // Load our new exec list + stream->writeMMIO(AubMemDump::computeRegisterOffset(mmioBase, 0x2550), 1); +} + +} // namespace NEO diff --git a/opencl/source/command_stream/tbx_command_stream_receiver_xehp_plus.inl b/opencl/source/command_stream/tbx_command_stream_receiver_xehp_plus.inl new file mode 100644 index 0000000000..a74b20c47c --- /dev/null +++ b/opencl/source/command_stream/tbx_command_stream_receiver_xehp_plus.inl @@ -0,0 +1,16 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +template <> +uint32_t TbxCommandStreamReceiverHw::getMaskAndValueForPollForCompletion() const { + return 0x80; +} + +template <> +bool TbxCommandStreamReceiverHw::getpollNotEqualValueForPollForCompletion() const { + return true; +} diff --git a/opencl/source/gtpin/CMakeLists.txt b/opencl/source/gtpin/CMakeLists.txt index 820c86c5f8..c40c332a85 100644 --- a/opencl/source/gtpin/CMakeLists.txt +++ b/opencl/source/gtpin/CMakeLists.txt @@ -20,6 +20,12 @@ if(NOT DISABLED_GTPIN_SUPPORT) ${CMAKE_CURRENT_SOURCE_DIR}/gtpin_defs.h ) + if(SUPPORT_XEHP_PLUS) + list(APPEND RUNTIME_SRCS_GTPIN + ${CMAKE_CURRENT_SOURCE_DIR}/gtpin_hw_helper_xehp_plus.inl + ) + endif() + if(WIN32) set(MSVC_DEF_ADDITIONAL_EXPORTS "${MSVC_DEF_ADDITIONAL_EXPORTS}\nGTPin_Init") endif() diff --git a/opencl/source/gtpin/gtpin_hw_helper_xehp_plus.inl b/opencl/source/gtpin/gtpin_hw_helper_xehp_plus.inl new file mode 100644 index 0000000000..b2512dab95 --- /dev/null +++ b/opencl/source/gtpin/gtpin_hw_helper_xehp_plus.inl @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/debug_settings/debug_settings_manager.h" + +#include "opencl/source/gtpin/gtpin_hw_helper.h" + +namespace NEO { + +template +bool GTPinHwHelperHw::canUseSharedAllocation(const HardwareInfo &hwInfo) const { + bool canUseSharedAllocation = true; + if (DebugManager.flags.GTPinAllocateBufferInSharedMemory.get() != -1) { + canUseSharedAllocation = !!DebugManager.flags.GTPinAllocateBufferInSharedMemory.get(); + } + return canUseSharedAllocation; +} + +} // namespace NEO diff --git a/opencl/source/helpers/CMakeLists.txt b/opencl/source/helpers/CMakeLists.txt index ed3252521c..51e97fd068 100644 --- a/opencl/source/helpers/CMakeLists.txt +++ b/opencl/source/helpers/CMakeLists.txt @@ -54,6 +54,13 @@ set(RUNTIME_SRCS_HELPERS_BASE ${CMAKE_CURRENT_SOURCE_DIR}/validators.h ) +if(SUPPORT_XEHP_PLUS) + list(APPEND RUNTIME_SRCS_HELPERS_BASE + ${CMAKE_CURRENT_SOURCE_DIR}/cl_hw_helper_xehp_plus.inl + ${CMAKE_CURRENT_SOURCE_DIR}/hardware_commands_helper_xehp_plus.inl + ) +endif() + set(RUNTIME_SRCS_HELPERS_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/windows/gmm_callbacks_tgllp_plus.inl ${CMAKE_CURRENT_SOURCE_DIR}/windows/kmd_notify_properties_windows.cpp diff --git a/opencl/source/helpers/cl_device_helpers.cpp b/opencl/source/helpers/cl_device_helpers.cpp index 23d7730380..53218bba6c 100644 --- a/opencl/source/helpers/cl_device_helpers.cpp +++ b/opencl/source/helpers/cl_device_helpers.cpp @@ -9,4 +9,5 @@ namespace NEO { void ClDeviceHelper::getExtraDeviceInfo(const ClDevice &clDevice, cl_device_info paramName, ClDeviceInfoParam ¶m, const void *&src, size_t &size, size_t &retSize) {} +cl_device_feature_capabilities_intel ClDeviceHelper::getExtraCapabilities() { return 0; } } // namespace NEO diff --git a/opencl/source/helpers/cl_device_helpers.h b/opencl/source/helpers/cl_device_helpers.h index 321a0c137e..e6978cf139 100644 --- a/opencl/source/helpers/cl_device_helpers.h +++ b/opencl/source/helpers/cl_device_helpers.h @@ -6,6 +6,8 @@ */ #pragma once +#include "opencl/extensions/public/cl_ext_private.h" + #include "CL/cl.h" #include @@ -15,5 +17,6 @@ struct ClDeviceInfoParam; namespace ClDeviceHelper { void getExtraDeviceInfo(const ClDevice &clDevice, cl_device_info paramName, ClDeviceInfoParam ¶m, const void *&src, size_t &size, size_t &retSize); +cl_device_feature_capabilities_intel getExtraCapabilities(); }; // namespace ClDeviceHelper } // namespace NEO diff --git a/opencl/source/helpers/cl_hw_helper_xehp_plus.inl b/opencl/source/helpers/cl_hw_helper_xehp_plus.inl new file mode 100644 index 0000000000..e382cc2d25 --- /dev/null +++ b/opencl/source/helpers/cl_hw_helper_xehp_plus.inl @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "opencl/source/helpers/cl_device_helpers.h" +#include "opencl/source/helpers/cl_hw_helper.h" + +namespace NEO { + +template +inline cl_command_queue_capabilities_intel ClHwHelperHw::getAdditionalDisabledQueueFamilyCapabilities(EngineGroupType type) const { + return 0; +} + +template +cl_ulong ClHwHelperHw::getKernelPrivateMemSize(const KernelInfo &kernelInfo) const { + const auto &kernelAttributes = kernelInfo.kernelDescriptor.kernelAttributes; + return (kernelAttributes.perThreadScratchSize[1] > 0) ? kernelAttributes.perThreadScratchSize[1] : kernelAttributes.perHwThreadPrivateMemorySize; +} + +template +cl_device_feature_capabilities_intel ClHwHelperHw::getSupportedDeviceFeatureCapabilities() const { + return ClDeviceHelper::getExtraCapabilities(); +} + +} // namespace NEO diff --git a/opencl/source/helpers/hardware_commands_helper_xehp_plus.inl b/opencl/source/helpers/hardware_commands_helper_xehp_plus.inl new file mode 100644 index 0000000000..049e7a35af --- /dev/null +++ b/opencl/source/helpers/hardware_commands_helper_xehp_plus.inl @@ -0,0 +1,171 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once + +#include "shared/source/helpers/flat_batch_buffer_helper.h" +#include "shared/source/helpers/hw_helper.h" +#include "shared/source/helpers/l3_range.h" +#include "shared/source/helpers/string.h" + +#include "opencl/source/cl_device/cl_device.h" +#include "opencl/source/command_queue/command_queue.h" +#include "opencl/source/helpers/hardware_commands_helper.h" +#include "opencl/source/kernel/kernel.h" + +#include "pipe_control_args.h" + +namespace NEO { + +template +typename HardwareCommandsHelper::INTERFACE_DESCRIPTOR_DATA *HardwareCommandsHelper::getInterfaceDescriptor( + const IndirectHeap &indirectHeap, + uint64_t offsetInterfaceDescriptor, + INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor) { + + return inlineInterfaceDescriptor; +} + +template +uint32_t HardwareCommandsHelper::additionalSizeRequiredDsh() { + return 0u; +} + +template +size_t HardwareCommandsHelper::getSizeRequiredCS() { + return 0; +} + +template +size_t HardwareCommandsHelper::getSizeRequiredForCacheFlush(const CommandQueue &commandQueue, const Kernel *kernel, uint64_t postSyncAddress) { + UNRECOVERABLE_IF(true); + return 0; +} + +template +void HardwareCommandsHelper::sendMediaStateFlush( + LinearStream &commandStream, + size_t offsetInterfaceDescriptorData) { +} + +template +void HardwareCommandsHelper::sendMediaInterfaceDescriptorLoad( + LinearStream &commandStream, + size_t offsetInterfaceDescriptorData, + size_t sizeInterfaceDescriptorData) { +} + +template +void HardwareCommandsHelper::programPerThreadData( + size_t &sizePerThreadData, + const bool &localIdsGenerationByRuntime, + LinearStream &ioh, + uint32_t &simd, + uint32_t &numChannels, + const size_t localWorkSize[3], + Kernel &kernel, + size_t &sizePerThreadDataTotal, + size_t &localWorkItems, + uint32_t rootDeviceIndex) { + if (localIdsGenerationByRuntime) { + constexpr uint32_t grfSize = sizeof(typename GfxFamily::GRF); + sendPerThreadData( + ioh, + simd, + grfSize, + numChannels, + std::array{{static_cast(localWorkSize[0]), static_cast(localWorkSize[1]), static_cast(localWorkSize[2])}}, + {{0u, 1u, 2u}}, + kernel.usesOnlyImages()); + + updatePerThreadDataTotal(sizePerThreadData, simd, numChannels, sizePerThreadDataTotal, localWorkItems); + } +} + +template +size_t HardwareCommandsHelper::sendCrossThreadData( + IndirectHeap &indirectHeap, + Kernel &kernel, + bool inlineDataProgrammingRequired, + WALKER_TYPE *walkerCmd, + uint32_t &sizeCrossThreadData) { + + indirectHeap.align(WALKER_TYPE::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); + + auto offsetCrossThreadData = indirectHeap.getUsed(); + char *dest = nullptr; + char *src = kernel.getCrossThreadData(); + + using InlineData = typename GfxFamily::INLINE_DATA; + using GRF = typename GfxFamily::GRF; + uint32_t inlineDataSize = sizeof(InlineData); + uint32_t sizeToCopy = sizeCrossThreadData; + if (inlineDataProgrammingRequired == true) { + sizeToCopy = std::min(inlineDataSize, sizeCrossThreadData); + dest = reinterpret_cast(walkerCmd->getInlineDataPointer()); + memcpy_s(dest, sizeToCopy, kernel.getCrossThreadData(), sizeToCopy); + auto offset = std::min(inlineDataSize, sizeCrossThreadData); + sizeCrossThreadData -= offset; + src += offset; + } + + if (sizeCrossThreadData > 0) { + dest = static_cast(indirectHeap.getSpace(sizeCrossThreadData)); + memcpy_s(dest, sizeCrossThreadData, src, sizeCrossThreadData); + } + + if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { + FlatBatchBufferHelper::fixCrossThreadDataInfo(kernel.getPatchInfoDataList(), offsetCrossThreadData, indirectHeap.getGraphicsAllocation()->getGpuAddress()); + } + + return offsetCrossThreadData + static_cast(is64bit ? indirectHeap.getHeapGpuStartOffset() : indirectHeap.getHeapGpuBase()); +} + +template +bool HardwareCommandsHelper::resetBindingTablePrefetch(Kernel &kernel) { + return kernel.isSchedulerKernel; +} + +template +void HardwareCommandsHelper::setInterfaceDescriptorOffset( + WALKER_TYPE *walkerCmd, + uint32_t &interfaceDescriptorIndex) { +} + +template +void HardwareCommandsHelper::programCacheFlushAfterWalkerCommand(LinearStream *commandStream, const CommandQueue &commandQueue, const Kernel *kernel, uint64_t postSyncAddress) { + // 1. make sure previous kernel finished + PipeControlArgs args; + auto &hardwareInfo = commandQueue.getDevice().getHardwareInfo(); + args.adjustArgs(hardwareInfo); + + MemorySynchronizationCommands::addPipeControl(*commandStream, args); + + // 2. flush all affected L3 lines + if constexpr (GfxFamily::isUsingL3Control) { + StackVec allocationsForCacheFlush; + kernel->getAllocationsForCacheFlush(allocationsForCacheFlush); + StackVec subranges; + for (GraphicsAllocation *alloc : allocationsForCacheFlush) { + coverRangeExact(alloc->getGpuAddress(), alloc->getUnderlyingBufferSize(), subranges, GfxFamily::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION); + } + for (size_t subrangeNumber = 0; subrangeNumber < subranges.size(); subrangeNumber += maxFlushSubrangeCount) { + size_t rangeCount = subranges.size() <= subrangeNumber + maxFlushSubrangeCount ? subranges.size() - subrangeNumber : maxFlushSubrangeCount; + Range range = CreateRange(subranges.begin() + subrangeNumber, rangeCount); + uint64_t postSyncAddressToFlush = 0; + if (rangeCount < maxFlushSubrangeCount || subranges.size() - subrangeNumber - maxFlushSubrangeCount == 0) { + postSyncAddressToFlush = postSyncAddress; + } + + flushGpuCache(commandStream, range, postSyncAddressToFlush, hardwareInfo); + } + } else { + UNUSED_VARIABLE(postSyncAddress); + } +} + +} // namespace NEO diff --git a/opencl/source/mem_obj/CMakeLists.txt b/opencl/source/mem_obj/CMakeLists.txt index 7b1cce932c..147e6c35ac 100644 --- a/opencl/source/mem_obj/CMakeLists.txt +++ b/opencl/source/mem_obj/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2018-2020 Intel Corporation +# Copyright (C) 2018-2021 Intel Corporation # # SPDX-License-Identifier: MIT # @@ -28,6 +28,12 @@ set(RUNTIME_SRCS_MEM_OBJ ${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/image_ext.inl ) +if(SUPPORT_XEHP_PLUS) + list(APPEND RUNTIME_SRCS_MEM_OBJ + ${CMAKE_CURRENT_SOURCE_DIR}/image_xehp_plus.inl + ) +endif() + target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_MEM_OBJ}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_MEM_OBJ ${RUNTIME_SRCS_MEM_OBJ}) add_subdirectories() diff --git a/opencl/source/mem_obj/image_xehp_plus.inl b/opencl/source/mem_obj/image_xehp_plus.inl new file mode 100644 index 0000000000..f74ca15423 --- /dev/null +++ b/opencl/source/mem_obj/image_xehp_plus.inl @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2016-2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +namespace NEO { + +template <> +void ImageHw::setMediaSurfaceRotation(void *memory) { + using MEDIA_SURFACE_STATE = typename Family::MEDIA_SURFACE_STATE; + using SURFACE_FORMAT = typename MEDIA_SURFACE_STATE::SURFACE_FORMAT; + + auto surfaceState = reinterpret_cast(memory); + + surfaceState->setRotation(MEDIA_SURFACE_STATE::ROTATION_NO_ROTATION_OR_0_DEGREE); + surfaceState->setXOffset(0); + surfaceState->setYOffset(0); +} + +template <> +void ImageHw::setSurfaceMemoryObjectControlStateIndexToMocsTable(void *memory, uint32_t value) { + using MEDIA_SURFACE_STATE = typename Family::MEDIA_SURFACE_STATE; + using SURFACE_FORMAT = typename MEDIA_SURFACE_STATE::SURFACE_FORMAT; + + auto surfaceState = reinterpret_cast(memory); + + surfaceState->setSurfaceMemoryObjectControlStateIndexToMocsTables(value); +} + +template <> +void ImageHw::appendSurfaceStateParams(Family::RENDER_SURFACE_STATE *surfaceState, uint32_t rootDeviceIndex, bool useGlobalAtomics) { + auto imageCtxType = this->context->peekContextType(); + + bool enableMultiGpuPartialWrites = (imageCtxType != ContextType::CONTEXT_TYPE_SPECIALIZED) && (context->containsMultipleSubDevices(rootDeviceIndex)); + + bool enableMultiGpuAtomics = enableMultiGpuPartialWrites; + + if (DebugManager.flags.EnableMultiGpuAtomicsOptimization.get()) { + enableMultiGpuAtomics &= useGlobalAtomics; + } + + surfaceState->setDisableSupportForMultiGpuAtomics(!enableMultiGpuAtomics); + surfaceState->setDisableSupportForMultiGpuPartialWrites(!enableMultiGpuPartialWrites); + + if (DebugManager.flags.ForceMultiGpuAtomics.get() != -1) { + surfaceState->setDisableSupportForMultiGpuAtomics(!!DebugManager.flags.ForceMultiGpuAtomics.get()); + } + if (DebugManager.flags.ForceMultiGpuPartialWrites.get() != -1) { + surfaceState->setDisableSupportForMultiGpuPartialWrites(!!DebugManager.flags.ForceMultiGpuPartialWrites.get()); + } +} + +} // namespace NEO diff --git a/opencl/source/memory_manager/CMakeLists.txt b/opencl/source/memory_manager/CMakeLists.txt index 75ece7651e..935b52da3a 100644 --- a/opencl/source/memory_manager/CMakeLists.txt +++ b/opencl/source/memory_manager/CMakeLists.txt @@ -9,6 +9,7 @@ set(RUNTIME_SRCS_MEMORY_MANAGER ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/compression_selector_ocl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cpu_page_fault_manager_memory_sync.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mem_obj_surface.h + ${CMAKE_CURRENT_SOURCE_DIR}/resource_surface.h ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_MEMORY_MANAGER}) diff --git a/opencl/source/memory_manager/resource_surface.h b/opencl/source/memory_manager/resource_surface.h new file mode 100644 index 0000000000..2f6fdbfb30 --- /dev/null +++ b/opencl/source/memory_manager/resource_surface.h @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once +#include "shared/source/memory_manager/surface.h" + +#include "opencl/extensions/public/cl_ext_private.h" + +namespace NEO { +class ResourceSurface : public GeneralSurface { + public: + ResourceSurface(GraphicsAllocation *gfxAlloc, cl_resource_barrier_type type, cl_resource_memory_scope scope) : GeneralSurface(gfxAlloc), resourceType(type), resourceScope(scope) {} + ~ResourceSurface() override = default; + + GraphicsAllocation *getGraphicsAllocation() { + return gfxAllocation; + } + + cl_resource_barrier_type resourceType; + cl_resource_memory_scope resourceScope; +}; +} // namespace NEO \ No newline at end of file diff --git a/opencl/source/os_interface/linux/CMakeLists.txt b/opencl/source/os_interface/linux/CMakeLists.txt index db932a2675..c8d2fecf52 100644 --- a/opencl/source/os_interface/linux/CMakeLists.txt +++ b/opencl/source/os_interface/linux/CMakeLists.txt @@ -18,6 +18,13 @@ set(RUNTIME_SRCS_OS_INTERFACE_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/performance_counters_linux.cpp ${CMAKE_CURRENT_SOURCE_DIR}/performance_counters_linux.h ) + +if(SUPPORT_XEHP_PLUS) + list(APPEND RUNTIME_SRCS_OS_INTERFACE_LINUX + ${CMAKE_CURRENT_SOURCE_DIR}/drm_command_stream_xehp_plus.inl + ) +endif() + if(UNIX) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_OS_INTERFACE_LINUX}) endif() diff --git a/opencl/source/os_interface/linux/drm_command_stream_xehp_plus.inl b/opencl/source/os_interface/linux/drm_command_stream_xehp_plus.inl new file mode 100644 index 0000000000..8420101cbd --- /dev/null +++ b/opencl/source/os_interface/linux/drm_command_stream_xehp_plus.inl @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2016-2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/os_interface/linux/drm_allocation.h" +#include "shared/source/os_interface/linux/os_context_linux.h" + +#include "opencl/source/os_interface/linux/drm_command_stream.h" + +namespace NEO { + +template +void DrmCommandStreamReceiver::flushInternal(const BatchBuffer &batchBuffer, const ResidencyContainer &allocationsForResidency) { + auto useImmediateExt = this->drm->isDirectSubmissionActive(); + + if (DebugManager.flags.EnableImmediateVmBindExt.get() != -1) { + useImmediateExt = DebugManager.flags.EnableImmediateVmBindExt.get(); + } + + if (useImmediateExt) { + auto osContextLinux = static_cast(this->osContext); + osContextLinux->waitForPagingFence(); + } + + auto &drmContextIds = static_cast(osContext)->getDrmContextIds(); + + uint32_t contextIndex = 0; + for (auto tileIterator = 0u; tileIterator < this->osContext->getDeviceBitfield().size(); tileIterator++) { + if (this->osContext->getDeviceBitfield().test(tileIterator)) { + if (DebugManager.flags.ForceExecutionTile.get() != -1 && this->osContext->getDeviceBitfield().count() > 1) { + tileIterator = contextIndex = DebugManager.flags.ForceExecutionTile.get(); + } + + this->processResidency(allocationsForResidency, tileIterator); + if (DebugManager.flags.PrintDeviceAndEngineIdOnSubmission.get()) { + printf("Drm Submission of contextIndex: %u, with context id %u\n", contextIndex, drmContextIds[contextIndex]); + } + + this->exec(batchBuffer, tileIterator, drmContextIds[contextIndex]); + + contextIndex++; + + if (DebugManager.flags.EnableWalkerPartition.get() == 0 || batchBuffer.useSingleSubdevice) { + return; + } + } + } +} + +template +int DrmCommandStreamReceiver::waitUserFence(uint32_t waitValue) { + int ret = 0; + uint64_t tagAddress = castToUint64(const_cast(getTagAddress())); + if (useContextForUserFenceWait) { + for (auto tileIterator = 0u; tileIterator < this->osContext->getDeviceBitfield().size(); tileIterator++) { + uint32_t ctxId = 0u; + if (this->osContext->getDeviceBitfield().test(tileIterator)) { + ctxId = static_cast(osContext)->getDrmContextIds()[tileIterator]; + ret |= this->drm->waitUserFence(ctxId, tagAddress, waitValue, Drm::ValueWidth::U32, kmdWaitTimeout, 0u); + } + } + } else { + ret = this->drm->waitUserFence(0u, tagAddress, waitValue, Drm::ValueWidth::U32, kmdWaitTimeout, 0u); + } + + return ret; +} + +} // namespace NEO diff --git a/opencl/source/xe_hp_core/CMakeLists.txt b/opencl/source/xe_hp_core/CMakeLists.txt new file mode 100644 index 0000000000..f0de29fe34 --- /dev/null +++ b/opencl/source/xe_hp_core/CMakeLists.txt @@ -0,0 +1,9 @@ +# +# Copyright (C) 2021 Intel Corporation +# +# SPDX-License-Identifier: MIT +# + +if(SUPPORT_XE_HP_CORE) + add_subdirectories() +endif() diff --git a/opencl/source/xe_hp_core/aub_command_stream_receiver_xe_hp_core.cpp b/opencl/source/xe_hp_core/aub_command_stream_receiver_xe_hp_core.cpp new file mode 100644 index 0000000000..0dcfb968b0 --- /dev/null +++ b/opencl/source/xe_hp_core/aub_command_stream_receiver_xe_hp_core.cpp @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/helpers/array_count.h" +#include "shared/source/helpers/populate_factory.h" + +#include "opencl/source/command_stream/aub_command_stream_receiver_hw_xehp_plus.inl" + +namespace NEO { + +typedef XeHpFamily Family; +static auto gfxCore = IGFX_XE_HP_CORE; + +template <> +void populateFactoryTable>() { + extern AubCommandStreamReceiverCreateFunc aubCommandStreamReceiverFactory[IGFX_MAX_CORE]; + UNRECOVERABLE_IF(!isInRange(gfxCore, aubCommandStreamReceiverFactory)); + aubCommandStreamReceiverFactory[gfxCore] = AUBCommandStreamReceiverHw::create; +} + +template class AUBCommandStreamReceiverHw; +} // namespace NEO diff --git a/opencl/source/xe_hp_core/aub_mem_dump_xe_hp_core.cpp b/opencl/source/xe_hp_core/aub_mem_dump_xe_hp_core.cpp new file mode 100644 index 0000000000..d1952913e1 --- /dev/null +++ b/opencl/source/xe_hp_core/aub_mem_dump_xe_hp_core.cpp @@ -0,0 +1,20 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/aub/aub_helper_xehp_plus.inl" + +#include "aub_mem_dump.h" + +namespace NEO { +struct XeHpFamily; +using Family = NEO::XeHpFamily; +constexpr static auto deviceValue = AubMemDump::DeviceValues::XeHP_SDV; + +template class AubHelperHw; +} // namespace NEO + +#include "shared/source/aub_mem_dump/aub_mem_dump_xehp_plus.inl" diff --git a/opencl/source/xe_hp_core/buffer_xe_hp_core.cpp b/opencl/source/xe_hp_core/buffer_xe_hp_core.cpp new file mode 100644 index 0000000000..465d543597 --- /dev/null +++ b/opencl/source/xe_hp_core/buffer_xe_hp_core.cpp @@ -0,0 +1,20 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/xe_hp_core/hw_cmds.h" + +#include "opencl/source/mem_obj/buffer_base.inl" + +namespace NEO { + +typedef XeHpFamily Family; +static auto gfxCore = IGFX_XE_HP_CORE; + +template class BufferHw; + +#include "opencl/source/mem_obj/buffer_factory_init.inl" +} // namespace NEO diff --git a/opencl/source/xe_hp_core/cl_hw_helper_xe_hp_core.cpp b/opencl/source/xe_hp_core/cl_hw_helper_xe_hp_core.cpp new file mode 100644 index 0000000000..dbac128590 --- /dev/null +++ b/opencl/source/xe_hp_core/cl_hw_helper_xe_hp_core.cpp @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/debug_settings/debug_settings_manager.h" +#include "shared/source/helpers/populate_factory.h" + +#include "opencl/source/context/context.h" +#include "opencl/source/helpers/cl_hw_helper_base.inl" +#include "opencl/source/helpers/cl_hw_helper_xehp_plus.inl" + +#include "hw_cmds.h" + +namespace NEO { + +using Family = XeHpFamily; +static auto gfxCore = IGFX_XE_HP_CORE; + +template <> +void populateFactoryTable>() { + extern ClHwHelper *clHwHelperFactory[IGFX_MAX_CORE]; + clHwHelperFactory[gfxCore] = &ClHwHelperHw::get(); +} + +template <> +bool ClHwHelperHw::requiresNonAuxMode(const ArgDescPointer &argAsPtr) const { + if (DebugManager.flags.EnableStatelessCompression.get()) { + return false; + } else { + return !argAsPtr.isPureStateful(); + } +} + +template <> +bool ClHwHelperHw::requiresAuxResolves(const KernelInfo &kernelInfo) const { + if (DebugManager.flags.EnableStatelessCompression.get()) { + return false; + } else { + return hasStatelessAccessToBuffer(kernelInfo); + } +} + +template <> +inline bool ClHwHelperHw::allowRenderCompressionForContext(const ClDevice &clDevice, const Context &context) const { + auto rootDeviceIndex = clDevice.getRootDeviceIndex(); + auto &hwInfo = clDevice.getHardwareInfo(); + if (context.containsMultipleSubDevices(rootDeviceIndex) && HwHelperHw::get().isWorkaroundRequired(REVISION_A0, REVISION_A1, hwInfo)) { + return false; + } + return true; +} + +template <> +bool ClHwHelperHw::isSupportedKernelThreadArbitrationPolicy() const { return false; } + +template <> +std::vector ClHwHelperHw::getSupportedThreadArbitrationPolicies() const { + return std::vector{}; +} + +template <> +cl_version ClHwHelperHw::getDeviceIpVersion(const HardwareInfo &hwInfo) const { + return makeDeviceIpVersion(12, 5, makeDeviceRevision(hwInfo)); +} + +template class ClHwHelperHw; + +} // namespace NEO diff --git a/opencl/source/xe_hp_core/command_queue_xe_hp_core.cpp b/opencl/source/xe_hp_core/command_queue_xe_hp_core.cpp new file mode 100644 index 0000000000..1eda1787a2 --- /dev/null +++ b/opencl/source/xe_hp_core/command_queue_xe_hp_core.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/helpers/populate_factory.h" +#include "shared/source/memory_manager/unified_memory_manager.h" + +#include "opencl/source/command_queue/command_queue_hw.h" +#include "opencl/source/command_queue/enqueue_resource_barrier.h" + +#include "enqueue_init_dispatch_globals.h" + +namespace NEO { + +using Family = XeHpFamily; +static auto gfxCore = IGFX_XE_HP_CORE; +} // namespace NEO + +#include "opencl/source/command_queue/command_queue_hw_xehp_plus.inl" + +namespace NEO { +template <> +void populateFactoryTable>() { + extern CommandQueueCreateFunc commandQueueFactory[IGFX_MAX_CORE]; + commandQueueFactory[gfxCore] = CommandQueueHw::create; +} + +} // namespace NEO + +template class NEO::CommandQueueHw; diff --git a/opencl/source/xe_hp_core/command_stream_receiver_simulated_common_hw_xe_hp_core.cpp b/opencl/source/xe_hp_core/command_stream_receiver_simulated_common_hw_xe_hp_core.cpp new file mode 100644 index 0000000000..db7e385b97 --- /dev/null +++ b/opencl/source/xe_hp_core/command_stream_receiver_simulated_common_hw_xe_hp_core.cpp @@ -0,0 +1,14 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "opencl/source/command_stream/command_stream_receiver_simulated_common_hw_xehp_plus.inl" + +namespace NEO { +typedef XeHpFamily Family; + +template class CommandStreamReceiverSimulatedCommonHw; +} // namespace NEO diff --git a/opencl/source/xe_hp_core/enable_family_full_ocl_xe_hp_core.cpp b/opencl/source/xe_hp_core/enable_family_full_ocl_xe_hp_core.cpp new file mode 100644 index 0000000000..3f087bc2c4 --- /dev/null +++ b/opencl/source/xe_hp_core/enable_family_full_ocl_xe_hp_core.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/command_stream/command_stream_receiver_hw.h" +#include "shared/source/command_stream/tbx_command_stream_receiver_hw.h" +#include "shared/source/helpers/populate_factory.h" + +#include "opencl/source/command_queue/command_queue_hw.h" +#include "opencl/source/command_stream/aub_command_stream_receiver_hw.h" +#include "opencl/source/device_queue/device_queue_hw.h" +#include "opencl/source/helpers/cl_hw_helper.h" +#include "opencl/source/mem_obj/buffer.h" +#include "opencl/source/mem_obj/image.h" +#include "opencl/source/sampler/sampler.h" + +namespace NEO { + +typedef XeHpFamily Family; + +struct EnableOCLXeHpCore { + EnableOCLXeHpCore() { + populateFactoryTable>(); + populateFactoryTable>(); + populateFactoryTable>(); + populateFactoryTable>(); + populateFactoryTable>(); + populateFactoryTable>(); + populateFactoryTable>(); + populateFactoryTable>(); + } +}; + +static EnableOCLXeHpCore enable; +} // namespace NEO diff --git a/opencl/source/xe_hp_core/experimental_command_buffer_xe_hp_core.cpp b/opencl/source/xe_hp_core/experimental_command_buffer_xe_hp_core.cpp new file mode 100644 index 0000000000..680f1616c4 --- /dev/null +++ b/opencl/source/xe_hp_core/experimental_command_buffer_xe_hp_core.cpp @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/command_stream/experimental_command_buffer.inl" + +namespace NEO { +typedef XeHpFamily GfxFamily; + +template void ExperimentalCommandBuffer::injectBufferStart(LinearStream &parentStream, size_t cmdBufferOffset); +template size_t ExperimentalCommandBuffer::getRequiredInjectionSize() noexcept; + +template size_t ExperimentalCommandBuffer::programExperimentalCommandBuffer(); +template size_t ExperimentalCommandBuffer::getTotalExperimentalSize() noexcept; + +template void ExperimentalCommandBuffer::addTimeStampPipeControl(); +template size_t ExperimentalCommandBuffer::getTimeStampPipeControlSize() noexcept; + +template void ExperimentalCommandBuffer::addExperimentalCommands(); +template size_t ExperimentalCommandBuffer::getExperimentalCommandsSize() noexcept; +} // namespace NEO diff --git a/opencl/source/xe_hp_core/gpgpu_walker_xe_hp_core.cpp b/opencl/source/xe_hp_core/gpgpu_walker_xe_hp_core.cpp new file mode 100644 index 0000000000..dbcd74458a --- /dev/null +++ b/opencl/source/xe_hp_core/gpgpu_walker_xe_hp_core.cpp @@ -0,0 +1,19 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "opencl/source/command_queue/gpgpu_walker_xehp_plus.inl" +#include "opencl/source/command_queue/hardware_interface_xehp_plus.inl" + +namespace NEO { + +template class GpgpuWalkerHelper; + +template class HardwareInterface; + +template struct EnqueueOperation; + +} // namespace NEO diff --git a/opencl/source/xe_hp_core/gtpin_setup_xe_hp_core.cpp b/opencl/source/xe_hp_core/gtpin_setup_xe_hp_core.cpp new file mode 100644 index 0000000000..473b810a6e --- /dev/null +++ b/opencl/source/xe_hp_core/gtpin_setup_xe_hp_core.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "opencl/source/gtpin/gtpin_hw_helper.h" +#include "opencl/source/gtpin/gtpin_hw_helper.inl" +#include "opencl/source/gtpin/gtpin_hw_helper_xehp_plus.inl" + +#include "ocl_igc_shared/gtpin/gtpin_ocl_interface.h" + +namespace NEO { + +extern GTPinHwHelper *gtpinHwHelperFactory[IGFX_MAX_CORE]; + +typedef XeHpFamily Family; +static const auto gfxFamily = IGFX_XE_HP_CORE; + +template <> +uint32_t GTPinHwHelperHw::getGenVersion() { + return gtpin::GTPIN_XEHP_CORE; +} + +template class GTPinHwHelperHw; + +struct GTPinEnableXeHpCore { + GTPinEnableXeHpCore() { + gtpinHwHelperFactory[gfxFamily] = >PinHwHelperHw::get(); + } +}; + +static GTPinEnableXeHpCore gtpinEnable; + +} // namespace NEO diff --git a/opencl/source/xe_hp_core/hardware_commands_helper_xe_hp_core.cpp b/opencl/source/xe_hp_core/hardware_commands_helper_xe_hp_core.cpp new file mode 100644 index 0000000000..58329c9a1b --- /dev/null +++ b/opencl/source/xe_hp_core/hardware_commands_helper_xe_hp_core.cpp @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/helpers/cache_flush_xehp_plus.inl" +#include "shared/source/xe_hp_core/hw_cmds.h" + +#include "opencl/source/helpers/hardware_commands_helper.h" +#include "opencl/source/helpers/hardware_commands_helper_base.inl" +#include "opencl/source/helpers/hardware_commands_helper_xehp_plus.inl" + +namespace NEO { + +template <> +void HardwareCommandsHelper::setGrfInfo(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const Kernel &kernel, + const size_t &sizeCrossThreadData, const size_t &sizePerThreadData) { +} + +template struct HardwareCommandsHelper; +} // namespace NEO diff --git a/opencl/source/xe_hp_core/hw_info_xe_hp_core.cpp b/opencl/source/xe_hp_core/hw_info_xe_hp_core.cpp new file mode 100644 index 0000000000..a85aeb84d9 --- /dev/null +++ b/opencl/source/xe_hp_core/hw_info_xe_hp_core.cpp @@ -0,0 +1,14 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#ifdef SUPPORT_XEHP +#include "hw_info_xehp.inl" +#endif + +namespace NEO { +const char *GfxFamilyMapper::name = "XE_HP_CORE"; +} // namespace NEO diff --git a/opencl/source/xe_hp_core/hw_info_xehp.inl b/opencl/source/xe_hp_core/hw_info_xehp.inl new file mode 100644 index 0000000000..607156b142 --- /dev/null +++ b/opencl/source/xe_hp_core/hw_info_xehp.inl @@ -0,0 +1,163 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/aub_mem_dump/definitions/aub_services.h" +#include "shared/source/debug_settings/debug_settings_manager.h" +#include "shared/source/helpers/constants.h" +#include "shared/source/helpers/hw_info.h" +#include "shared/source/xe_hp_core/hw_cmds.h" + +#include "engine_node.h" + +namespace NEO { + +const char *HwMapper::abbreviation = "xehp"; + +bool isSimulationXEHP(unsigned short deviceId) { + return false; +}; + +const PLATFORM XEHP::platform = { + IGFX_XE_HP_SDV, + PCH_UNKNOWN, + IGFX_XE_HP_CORE, + IGFX_XE_HP_CORE, + PLATFORM_NONE, // default init + 0, // usDeviceID + 0, // usRevId. 0 sets the stepping to A0 + 0, // usDeviceID_PCH + 0, // usRevId_PCH + GTTYPE_UNDEFINED}; + +const RuntimeCapabilityTable XEHP::capabilityTable{ + EngineDirectSubmissionInitVec{ + {aub_stream::ENGINE_RCS, {true, true, false, true}}, + {aub_stream::ENGINE_CCS, {true, true, false, true}}}, // directSubmissionEngines + {0, 0, 0, false, false, false}, // kmdNotifyProperties + MemoryConstants::max48BitAddress, // gpuAddressSpace + 83.333, // defaultProfilingTimerResolution + MemoryConstants::pageSize, // requiredPreemptionSurfaceSize + &isSimulationXEHP, // isSimulation + PreemptionMode::ThreadGroup, // defaultPreemptionMode + aub_stream::ENGINE_CCS, // defaultEngineType + 0, // maxRenderFrequency + 30, // clVersionSupport + CmdServicesMemTraceVersion::DeviceValues::XeHP_SDV, // aubDeviceId + 0, // extraQuantityThreadsPerEU + 64, // slmSize + sizeof(XEHP::GRF), // grfSize + 36u, // timestampValidBits + 32u, // kernelTimestampValidBits + false, // blitterOperationsSupported + true, // ftrSupportsInteger64BitAtomics + true, // ftrSupportsFP64 + true, // ftrSupports64BitMath + true, // ftrSvm + false, // ftrSupportsCoherency + false, // ftrSupportsVmeAvcTextureSampler + false, // ftrSupportsVmeAvcPreemption + false, // ftrRenderCompressedBuffers + false, // ftrRenderCompressedImages + true, // ftr64KBpages + true, // instrumentationEnabled + true, // forceStatelessCompilationFor32Bit + "core", // platformType + "", // deviceName + true, // sourceLevelDebuggerSupported + false, // supportsVme + true, // supportCacheFlushAfterWalker + true, // supportsImages + false, // supportsDeviceEnqueue + false, // supportsPipes + true, // supportsOcl21Features + false, // supportsOnDemandPageFaults + false, // supportsIndependentForwardProgress + false, // hostPtrTrackingEnabled + true, // levelZeroSupported + false, // isIntegratedDevice + true, // supportsMediaBlock + true // fusedEuEnabled +}; + +WorkaroundTable XEHP::workaroundTable = {}; +FeatureTable XEHP::featureTable = {}; + +void XEHP::setupFeatureAndWorkaroundTable(HardwareInfo *hwInfo) { + FeatureTable *featureTable = &hwInfo->featureTable; + WorkaroundTable *workaroundTable = &hwInfo->workaroundTable; + + featureTable->ftrL3IACoherency = true; + featureTable->ftrFlatPhysCCS = true; + featureTable->ftrPPGTT = true; + featureTable->ftrSVM = true; + featureTable->ftrIA32eGfxPTEs = true; + featureTable->ftrStandardMipTailFormat = true; + featureTable->ftrTranslationTable = true; + featureTable->ftrUserModeTranslationTable = true; + featureTable->ftrTileMappedResource = true; + featureTable->ftrEnableGuC = true; + featureTable->ftrFbc = true; + featureTable->ftrFbc2AddressTranslation = true; + featureTable->ftrFbcBlitterTracking = true; + featureTable->ftrAstcHdr2D = true; + featureTable->ftrAstcLdr2D = true; + + featureTable->ftr3dMidBatchPreempt = true; + featureTable->ftrGpGpuMidBatchPreempt = true; + featureTable->ftrGpGpuThreadGroupLevelPreempt = true; + featureTable->ftrPerCtxtPreemptionGranularityControl = true; + + featureTable->ftrTileY = false; + featureTable->ftrLocalMemory = true; + featureTable->ftrLinearCCS = true; + featureTable->ftrE2ECompression = true; + featureTable->ftrCCSNode = true; + featureTable->ftrCCSRing = true; + featureTable->ftrMultiTileArch = true; + featureTable->ftrCCSMultiInstance = true; + + workaroundTable->wa4kAlignUVOffsetNV12LinearSurface = true; + workaroundTable->waEnablePreemptionGranularityControlByUMD = true; +}; + +const HardwareInfo XEHP_CONFIG::hwInfo = { + &XEHP::platform, + &XEHP::featureTable, + &XEHP::workaroundTable, + &XEHP_CONFIG::gtSystemInfo, + XEHP::capabilityTable, +}; +GT_SYSTEM_INFO XEHP_CONFIG::gtSystemInfo = {0}; +void XEHP_CONFIG::setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable) { + XEHP_CONFIG::setupHardwareInfoMultiTile(hwInfo, setupFeatureTableAndWorkaroundTable, false); +} + +void XEHP_CONFIG::setupHardwareInfoMultiTile(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable, bool setupMultiTile) { + GT_SYSTEM_INFO *gtSysInfo = &hwInfo->gtSystemInfo; + gtSysInfo->CsrSizeInMb = 8; + gtSysInfo->IsL3HashModeEnabled = false; + gtSysInfo->IsDynamicallyPopulated = false; + + if (setupFeatureTableAndWorkaroundTable) { + XEHP::setupFeatureAndWorkaroundTable(hwInfo); + } +}; + +const HardwareInfo XEHP::hwInfo = XEHP_CONFIG::hwInfo; +const uint64_t XEHP::defaultHardwareInfoConfig = 0; + +void setupXEHPHardwareInfoImpl(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable, uint64_t hwInfoConfig) { + if (hwInfoConfig == 0x0) { + // Default config + XEHP_CONFIG::setupHardwareInfo(hwInfo, setupFeatureTableAndWorkaroundTable); + } else { + UNRECOVERABLE_IF(true); + } +} + +void (*XEHP::setupHardwareInfo)(HardwareInfo *, bool, const uint64_t) = setupXEHPHardwareInfoImpl; +} // namespace NEO diff --git a/opencl/source/xe_hp_core/image_xe_hp_core.cpp b/opencl/source/xe_hp_core/image_xe_hp_core.cpp new file mode 100644 index 0000000000..087a96d87a --- /dev/null +++ b/opencl/source/xe_hp_core/image_xe_hp_core.cpp @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/device/device.h" +#include "shared/source/xe_hp_core/hw_cmds.h" + +#include "opencl/source/context/context.h" +#include "opencl/source/mem_obj/image.inl" + +#include "gmm_client_context.h" + +namespace NEO { + +using Family = XeHpFamily; +static auto gfxCore = IGFX_XE_HP_CORE; + +} // namespace NEO + +#include "opencl/source/mem_obj/image_xehp_plus.inl" + +namespace NEO { +// clang-format off +#include "opencl/source/mem_obj/image_tgllp_plus.inl" +#include "opencl/source/mem_obj/image_factory_init.inl" +// clang-format on +} // namespace NEO diff --git a/opencl/source/xe_hp_core/linux/command_stream_receiver_xe_hp_core.cpp b/opencl/source/xe_hp_core/linux/command_stream_receiver_xe_hp_core.cpp new file mode 100644 index 0000000000..18ad88a0ef --- /dev/null +++ b/opencl/source/xe_hp_core/linux/command_stream_receiver_xe_hp_core.cpp @@ -0,0 +1,18 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "opencl/source/command_stream/command_stream_receiver_with_aub_dump.inl" +#include "opencl/source/os_interface/linux/device_command_stream.inl" +#include "opencl/source/os_interface/linux/drm_command_stream.inl" +#include "opencl/source/os_interface/linux/drm_command_stream_xehp_plus.inl" + +namespace NEO { + +template class DeviceCommandStreamReceiver; +template class DrmCommandStreamReceiver; +template class CommandStreamReceiverWithAUBDump>; +} // namespace NEO diff --git a/opencl/source/xe_hp_core/sampler_xe_hp_core.cpp b/opencl/source/xe_hp_core/sampler_xe_hp_core.cpp new file mode 100644 index 0000000000..bbc708a172 --- /dev/null +++ b/opencl/source/xe_hp_core/sampler_xe_hp_core.cpp @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/xe_hp_core/hw_cmds_base.h" +using Family = NEO::XeHpFamily; +constexpr static auto gfxCore = IGFX_XE_HP_CORE; +#include "shared/source/debug_settings/debug_settings_manager.h" + +#include "opencl/source/cl_device/cl_device.h" +#include "opencl/source/context/context.h" +#include "opencl/source/sampler/sampler.h" +#include "opencl/source/sampler/sampler.inl" +namespace NEO { + +using SAMPLER_STATE = typename Family::SAMPLER_STATE; + +template <> +void SamplerHw::appendSamplerStateParams(SAMPLER_STATE *state, const HardwareInfo &hwInfo) { + if (DebugManager.flags.ForceSamplerLowFilteringPrecision.get()) { + state->setLowQualityFilter(SAMPLER_STATE::LOW_QUALITY_FILTER_ENABLE); + } +} + +#include "opencl/source/sampler/sampler_factory_init.inl" +} // namespace NEO diff --git a/opencl/source/xe_hp_core/state_compute_mode_helper_xe_hp_core.cpp b/opencl/source/xe_hp_core/state_compute_mode_helper_xe_hp_core.cpp new file mode 100644 index 0000000000..3ed1181c4d --- /dev/null +++ b/opencl/source/xe_hp_core/state_compute_mode_helper_xe_hp_core.cpp @@ -0,0 +1,15 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/helpers/state_compute_mode_helper.h" + +namespace NEO { +template <> +bool StateComputeModeHelper::isStateComputeModeRequired(const CsrSizeRequestFlags &csrSizeRequestFlags, bool isThreadArbitionPolicyProgrammed) { + return false; +} +} // namespace NEO diff --git a/opencl/source/xe_hp_core/tbx_command_stream_receiver_xe_hp_core.cpp b/opencl/source/xe_hp_core/tbx_command_stream_receiver_xe_hp_core.cpp new file mode 100644 index 0000000000..fb775e6445 --- /dev/null +++ b/opencl/source/xe_hp_core/tbx_command_stream_receiver_xe_hp_core.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/command_stream/tbx_command_stream_receiver_hw.h" +#include "shared/source/command_stream/tbx_command_stream_receiver_hw.inl" +#include "shared/source/helpers/array_count.h" +#include "shared/source/helpers/populate_factory.h" +#include "shared/source/memory_manager/memory_banks.h" +#include "shared/source/memory_manager/memory_pool.h" +#include "shared/source/xe_hp_core/hw_cmds.h" + +#include "opencl/source/command_stream/command_stream_receiver_with_aub_dump.inl" + +namespace NEO { +typedef XeHpFamily Family; +static auto gfxCore = IGFX_XE_HP_CORE; + +template <> +void populateFactoryTable>() { + extern TbxCommandStreamReceiverCreateFunc tbxCommandStreamReceiverFactory[IGFX_MAX_CORE]; + UNRECOVERABLE_IF(!isInRange(gfxCore, tbxCommandStreamReceiverFactory)); + tbxCommandStreamReceiverFactory[gfxCore] = TbxCommandStreamReceiverHw::create; +} + +#include "opencl/source/command_stream/tbx_command_stream_receiver_xehp_plus.inl" + +template class TbxCommandStreamReceiverHw; +template class CommandStreamReceiverWithAUBDump>; +} // namespace NEO diff --git a/opencl/source/xe_hp_core/windows/command_stream_receiver_xe_hp_core.cpp b/opencl/source/xe_hp_core/windows/command_stream_receiver_xe_hp_core.cpp new file mode 100644 index 0000000000..f90b3bb3fb --- /dev/null +++ b/opencl/source/xe_hp_core/windows/command_stream_receiver_xe_hp_core.cpp @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "opencl/source/command_stream/command_stream_receiver_with_aub_dump.inl" +#include "opencl/source/os_interface/windows/device_command_stream.inl" +#include "opencl/source/os_interface/windows/wddm_device_command_stream.inl" + +namespace NEO { + +template class DeviceCommandStreamReceiver; +template class WddmCommandStreamReceiver; +template class CommandStreamReceiverWithAUBDump>; +} // namespace NEO diff --git a/opencl/source/xe_hp_core/windows/gmm_callbacks_xe_hp_core.cpp b/opencl/source/xe_hp_core/windows/gmm_callbacks_xe_hp_core.cpp new file mode 100644 index 0000000000..18592376ab --- /dev/null +++ b/opencl/source/xe_hp_core/windows/gmm_callbacks_xe_hp_core.cpp @@ -0,0 +1,15 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/xe_hp_core/hw_cmds.h" + +#include "opencl/source/helpers/windows/gmm_callbacks_tgllp_plus.inl" + +namespace NEO { +template struct DeviceCallbacks; +template struct TTCallbacks; +} // namespace NEO diff --git a/opencl/test/unit_test/command_queue/CMakeLists.txt b/opencl/test/unit_test/command_queue/CMakeLists.txt index d508619953..0a25a8a1c3 100644 --- a/opencl/test/unit_test/command_queue/CMakeLists.txt +++ b/opencl/test/unit_test/command_queue/CMakeLists.txt @@ -56,7 +56,7 @@ set(IGDRCL_SRCS_tests_command_queue ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_buffer_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_image_fixture.h ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_read_image_tests.cpp - ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/enqueue_resource_barier_tests.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_resource_barier_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_svm_mem_copy_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_svm_mem_fill_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/enqueue_svm_tests.cpp diff --git a/opencl/test/unit_test/command_queue/enqueue_resource_barier_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_resource_barier_tests.cpp index 09fe2f6ed0..86fd8da8d0 100644 --- a/opencl/test/unit_test/command_queue/enqueue_resource_barier_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_resource_barier_tests.cpp @@ -1,10 +1,11 @@ /* - * Copyright (C) 2019-2020 Intel Corporation + * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ +#include "opencl/source/command_queue/resource_barrier.h" #include "opencl/test/unit_test/command_queue/command_enqueue_fixture.h" #include "test.h" @@ -13,10 +14,56 @@ using namespace NEO; using ResourceBarrierTest = Test; HWTEST_F(ResourceBarrierTest, givenNullArgsAndHWCommandQueueWhenEnqueueResourceBarrierCalledThenCorrectStatusReturned) { - auto retVal = pCmdQ->enqueueResourceBarrier( + cl_resource_barrier_descriptor_intel descriptor{}; + auto retVal = CL_INVALID_VALUE; + size_t bufferSize = MemoryConstants::pageSize; + std::unique_ptr buffer(Buffer::create( + &pCmdQ->getContext(), + CL_MEM_READ_WRITE, + bufferSize, nullptr, + retVal)); + descriptor.mem_object = buffer.get(); + descriptor.svm_allocation_pointer = nullptr; + + BarrierCommand barrierCommand(pCmdQ, &descriptor, 1); + auto surface = reinterpret_cast(barrierCommand.surfacePtrs.begin()[0]); + EXPECT_EQ(surface->getGraphicsAllocation(), buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex())); + + retVal = pCmdQ->enqueueResourceBarrier( + &barrierCommand, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); -} \ No newline at end of file +} + +HWTEST_F(ResourceBarrierTest, whenEnqueueResourceBarrierCalledThenUpdateQueueCompletionStamp) { + cl_resource_barrier_descriptor_intel descriptor{}; + auto retVal = CL_INVALID_VALUE; + size_t bufferSize = MemoryConstants::pageSize; + std::unique_ptr buffer(Buffer::create(&pCmdQ->getContext(), CL_MEM_READ_WRITE, bufferSize, nullptr, retVal)); + descriptor.mem_object = buffer.get(); + descriptor.svm_allocation_pointer = nullptr; + + BarrierCommand barrierCommand(pCmdQ, &descriptor, 1); + + auto previousTaskCount = pCmdQ->taskCount; + auto previousTaskLevel = pCmdQ->taskLevel; + pCmdQ->enqueueResourceBarrier(&barrierCommand, 0, nullptr, nullptr); + + bool resourceBarrierSupported = pCmdQ->isCacheFlushCommand(CL_COMMAND_RESOURCE_BARRIER); + + if (resourceBarrierSupported) { + EXPECT_EQ(pCmdQ->taskCount, previousTaskCount + 1); + } else { + EXPECT_EQ(pCmdQ->taskCount, previousTaskCount); + } + EXPECT_EQ(pCmdQ->taskLevel, previousTaskLevel); +} + +HWTEST_F(ResourceBarrierTest, whenBarierCommandCreatedWithInvalidSvmPointerThenExceptionIsThrown) { + cl_resource_barrier_descriptor_intel descriptor{}; + descriptor.svm_allocation_pointer = nullptr; + EXPECT_THROW(BarrierCommand barrierCommand(pCmdQ, &descriptor, 1), std::exception); +} diff --git a/opencl/test/unit_test/kernel/cache_flush_tests.inl b/opencl/test/unit_test/kernel/cache_flush_tests.inl index fa6fa37d72..6a42a8dd4b 100644 --- a/opencl/test/unit_test/kernel/cache_flush_tests.inl +++ b/opencl/test/unit_test/kernel/cache_flush_tests.inl @@ -14,8 +14,8 @@ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "opencl/source/command_queue/command_queue_hw.h" -#include "opencl/source/command_queue/embargo/resource_barrier.h" #include "opencl/source/command_queue/gpgpu_walker.h" +#include "opencl/source/command_queue/resource_barrier.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/test/unit_test/fixtures/hello_world_fixture.h" #include "opencl/test/unit_test/helpers/cmd_buffer_validator.h" diff --git a/opencl/test/unit_test/os_interface/linux/drm_memory_info_tests_dg1.cpp b/opencl/test/unit_test/os_interface/linux/drm_memory_info_tests_dg1.cpp index ffe0b1e6f2..62befefba9 100644 --- a/opencl/test/unit_test/os_interface/linux/drm_memory_info_tests_dg1.cpp +++ b/opencl/test/unit_test/os_interface/linux/drm_memory_info_tests_dg1.cpp @@ -85,10 +85,10 @@ TEST(MemoryInfo, givenMemoryInfoWithRegionsWhenGettingMemoryRegionClassAndInstan auto regionSize = memoryInfo->getMemoryRegionSize(MemoryBanks::MainBank); EXPECT_EQ(8 * GB, regionSize); - regionClassAndInstance = memoryInfo->getMemoryRegionClassAndInstance(MemoryBanks::Bank0); + regionClassAndInstance = memoryInfo->getMemoryRegionClassAndInstance(MemoryBanks::getBankForLocalMemory(0)); EXPECT_EQ(regionInfo[1].region.memory_class, regionClassAndInstance.memory_class); EXPECT_EQ(regionInfo[1].region.memory_instance, regionClassAndInstance.memory_instance); - regionSize = memoryInfo->getMemoryRegionSize(MemoryBanks::Bank0); + regionSize = memoryInfo->getMemoryRegionSize(MemoryBanks::getBankForLocalMemory(0)); EXPECT_EQ(16 * GB, regionSize); } diff --git a/opencl/test/unit_test/os_interface/linux/drm_memory_manager_allocate_in_device_pool_tests_dg1.cpp b/opencl/test/unit_test/os_interface/linux/drm_memory_manager_allocate_in_device_pool_tests_dg1.cpp index 3de479ddfa..a9e0654747 100644 --- a/opencl/test/unit_test/os_interface/linux/drm_memory_manager_allocate_in_device_pool_tests_dg1.cpp +++ b/opencl/test/unit_test/os_interface/linux/drm_memory_manager_allocate_in_device_pool_tests_dg1.cpp @@ -184,7 +184,7 @@ TEST_F(DrmMemoryManagerLocalMemoryTest, givenDrmMemoryManagerWhenCreateBufferObj auto bo = std::unique_ptr(memoryManager->createBufferObjectInMemoryRegion(&memoryManager->getDrm(0), gpuAddress, size, - (1 << (MemoryBanks::Bank0 - 1)), + (1 << (MemoryBanks::getBankForLocalMemory(0) - 1)), 1)); ASSERT_NE(nullptr, bo); @@ -1507,7 +1507,7 @@ TEST_F(DrmMemoryManagerTestDg1, givenDrmMemoryManagerWhenGetLocalMemorySizeIsCal auto memoryInfo = static_cast(drm->getMemoryInfo()); ASSERT_NE(nullptr, memoryInfo); - EXPECT_EQ(memoryInfo->getMemoryRegionSize(MemoryBanks::Bank0), memoryManager.getLocalMemorySize(0u, 0xF)); + EXPECT_EQ(memoryInfo->getMemoryRegionSize(MemoryBanks::getBankForLocalMemory(0)), memoryManager.getLocalMemorySize(0u, 0xF)); } TEST_F(DrmMemoryManagerTestDg1, givenDrmMemoryManagerWhenGetLocalMemorySizeIsCalledButMemoryInfoIsNotAvailableThenSizeZeroIsReturned) { diff --git a/opencl/test/unit_test/test_files/igdrcl.config b/opencl/test/unit_test/test_files/igdrcl.config index 32aec2b562..a4ade2d92b 100644 --- a/opencl/test/unit_test/test_files/igdrcl.config +++ b/opencl/test/unit_test/test_files/igdrcl.config @@ -242,6 +242,53 @@ ExperimentalEnableCustomLocalMemoryAlignment = 0 AlignLocalMemoryVaTo2MB = -1 EngineInstancedSubDevices = 0 OverrideTimestampPacketSize = -1 +CFEComputeOverdispatchDisable = -1 +CFEWeightedDispatchModeDisable = -1 +CFESingleSliceDispatchCCSMode = -1 +CleanStateInPreamble = 0 +CFENumberOfWalkers = -1 +CFEMaximumNumberOfThreads = -1 +CFEOverDispatchControl = -1 +CFELargeGRFThreadAdjustDisable = -1 +SynchronizeWalkerInWparidMode = -1 +EnableWalkerPartition = -1 +OverrideNumComputeUnitsForScratch = -1 +ForceWorkgroupSize1x1x1 = -1 +ForceThreadGroupDispatchSize = -1 +ForceStatelessL1CachingPolicy = -1 +ForceMemoryBankIndexOverride = -1 +ExperimentalSynchronizeWithSemaphores = -1 +ExperimentalForceCrossAtomicSynchronization = -1 +EnableStatelessCompression = 0 +EnablePrivateScratchSlot1 = -1 +DisablePipeControlPrecedingPostSyncCommand = -1 +UseClearColorAllocationForBlitter = false +UseCachingPolicyForIndirectObjectHeap = -1 +OverrideMultiStoragePlacement = -1 +MultiTileIsaPlacement = -1 +FormatForStatelessCompressionWithUnifiedMemory = 0xF +ForceMultiGpuPartialWritesInComputeMode = -1 +ForceMultiGpuPartialWrites = -1 +ForceMultiGpuAtomicsInComputeMode = -1 +ForceMultiGpuAtomics = -1 +ForceBufferCompressionFormat = -1 +ExperimentalSetWalkerPartitionCount = 0 +EnableStatelessCompressionWithUnifiedMemory = 0 +EnableMultiGpuAtomicsOptimization = 1 +EnableHwGenerationLocalIds = -1 +WalkerPartitionPreferHighestDimension = -1 +SetMinimalPartitionSize = -1 +OverrideBlitterTargetMemory = -1 +OverrideBlitterMocs = -1 +GlobalSequencerFlushOnCopyEngine = false +ForceCompressionDisabledForCompressedBlitCopies = -1 +ExperimentalSetWalkerPartitionType = -1 +UseImmDataWriteModeOnPostSyncOperation = 0 +OverridePostSyncMocs = -1 +EnableImmediateVmBindExt = -1 +ForceExecutionTile = -1 +DisableCachingForHeaps = 0 +OverrideTimestampPacketSize = -1 ClDeviceGlobalMemSizeAvailablePercent = -1 DebugApiUsed = 0 ForceHostPointerImport = -1 @@ -249,5 +296,6 @@ OverrideMaxWorkGroupCount = -1 UseUmKmDataTranslator = 0 EnableUserFenceForCompletionWait = -1 EnableUserFenceUseCtxId = -1 +EnableResourceTags = 0 SetKmdWaitTimeout = -1 OverrideNotifyEnableForTagUpdatePostSync = -1 diff --git a/shared/source/CMakeLists.txt b/shared/source/CMakeLists.txt index a04399161c..2174495454 100644 --- a/shared/source/CMakeLists.txt +++ b/shared/source/CMakeLists.txt @@ -129,6 +129,7 @@ append_sources_from_properties(CORE_SOURCES if(WIN32) append_sources_from_properties(CORE_SOURCES NEO_CORE_GMM_HELPER_WINDOWS + NEO_CORE_HELPERS_GMM_CALLBACKS_WINDOWS NEO_CORE_DIRECT_SUBMISSION_WINDOWS NEO_CORE_OS_INTERFACE_WINDOWS NEO_CORE_OS_INTERFACE_WDDM @@ -146,6 +147,7 @@ else() ) if(NOT DISABLE_WDDM_LINUX) append_sources_from_properties(CORE_SOURCES + NEO_CORE_HELPERS_GMM_CALLBACKS_WINDOWS NEO_CORE_SRCS_HELPERS_WINDOWS NEO_CORE_GMM_HELPER_WINDOWS NEO_CORE_OS_INTERFACE_WDDM diff --git a/shared/source/aub/CMakeLists.txt b/shared/source/aub/CMakeLists.txt index 86b1a3aefb..12534b8ffd 100644 --- a/shared/source/aub/CMakeLists.txt +++ b/shared/source/aub/CMakeLists.txt @@ -18,5 +18,12 @@ set(NEO_CORE_AUB ${CMAKE_CURRENT_SOURCE_DIR}/aub_subcapture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_subcapture.h ) + +if(SUPPORT_XEHP_PLUS) + list(APPEND NEO_CORE_AUB + ${CMAKE_CURRENT_SOURCE_DIR}/aub_helper_xehp_plus.inl + ) +endif() + set_property(GLOBAL PROPERTY NEO_CORE_AUB ${NEO_CORE_AUB}) add_subdirectories() diff --git a/shared/source/aub/aub_helper_xehp_plus.inl b/shared/source/aub/aub_helper_xehp_plus.inl new file mode 100644 index 0000000000..4ca3da15c0 --- /dev/null +++ b/shared/source/aub/aub_helper_xehp_plus.inl @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/aub/aub_helper_base.inl" + +namespace NEO { + +template +int AubHelperHw::getDataHintForPml4Entry() const { + if (localMemoryEnabled) { + return AubMemDump::DataTypeHintValues::TracePpgttLevel4; + } + return AubMemDump::DataTypeHintValues::TraceNotype; +} + +template +int AubHelperHw::getDataHintForPdpEntry() const { + if (localMemoryEnabled) { + return AubMemDump::DataTypeHintValues::TracePpgttLevel3; + } + return AubMemDump::DataTypeHintValues::TraceNotype; +} + +template +int AubHelperHw::getDataHintForPdEntry() const { + if (localMemoryEnabled) { + return AubMemDump::DataTypeHintValues::TracePpgttLevel2; + } + return AubMemDump::DataTypeHintValues::TraceNotype; +} + +template +int AubHelperHw::getDataHintForPtEntry() const { + if (localMemoryEnabled) { + return AubMemDump::DataTypeHintValues::TracePpgttLevel1; + } + return AubMemDump::DataTypeHintValues::TraceNotype; +} + +} // namespace NEO diff --git a/shared/source/aub_mem_dump/CMakeLists.txt b/shared/source/aub_mem_dump/CMakeLists.txt index 7c1fc59450..4ab7c33265 100644 --- a/shared/source/aub_mem_dump/CMakeLists.txt +++ b/shared/source/aub_mem_dump/CMakeLists.txt @@ -22,5 +22,11 @@ if(NOT DEFINED AUB_STREAM_PROJECT_NAME) ) endif() +if(SUPPORT_XEHP_PLUS) + list(APPEND NEO_CORE_AUB_MEM_DUMP + ${CMAKE_CURRENT_SOURCE_DIR}/aub_mem_dump_xehp_plus.inl + ) +endif() + set_property(GLOBAL PROPERTY NEO_CORE_AUB_MEM_DUMP ${NEO_CORE_AUB_MEM_DUMP}) add_subdirectories() diff --git a/shared/source/aub_mem_dump/aub_mem_dump_xehp_plus.inl b/shared/source/aub_mem_dump/aub_mem_dump_xehp_plus.inl new file mode 100644 index 0000000000..06a42912a7 --- /dev/null +++ b/shared/source/aub_mem_dump/aub_mem_dump_xehp_plus.inl @@ -0,0 +1,293 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/aub_mem_dump/aub_alloc_dump.inl" +#include "shared/source/aub_mem_dump/aub_mem_dump.inl" +#include "shared/source/debug_settings/debug_settings_manager.h" +#include "shared/source/helpers/array_count.h" +#include "shared/source/helpers/completion_stamp.h" +#include "shared/source/helpers/hw_helper.h" + +#include "aub_mapper.h" +#include "config.h" +#include "reg_configs_common.h" + +namespace AubMemDump { + +enum { + device = deviceValue +}; + +// Instantiate these common template implementations. +template struct AubDump>; +template struct AubDump>; + +template struct AubPageTableHelper32>; +template struct AubPageTableHelper64>; +} // namespace AubMemDump + +namespace NEO { + +static const AubMemDump::LrcaHelperRcs rcs(0x002000); +static const AubMemDump::LrcaHelperBcs bcs(0x022000); +static const AubMemDump::LrcaHelperVcs vcs(0x1c0000); +static const AubMemDump::LrcaHelperVecs vecs(0x1c8000); +static const AubMemDump::LrcaHelperCcs ccs(0x1a000); +static const AubMemDump::LrcaHelperCcs ccs1(0x1c000); +static const AubMemDump::LrcaHelperCcs ccs2(0x1e000); +static const AubMemDump::LrcaHelperCcs ccs3(0x26000); + +const AubMemDump::LrcaHelper *const AUBFamilyMapper::csTraits[aub_stream::NUM_ENGINES] = { + &rcs, + &bcs, + &vcs, + &vecs, + &ccs, + &ccs1, + &ccs2, + &ccs3}; + +const MMIOList AUBFamilyMapper::globalMMIO = { + // GLOBAL_MOCS + MMIOPair(0x00004000, 0x00000008), + MMIOPair(0x00004004, 0x00000038), + MMIOPair(0x00004008, 0x00000038), + MMIOPair(0x0000400C, 0x00000008), + MMIOPair(0x00004010, 0x00000018), + MMIOPair(0x00004014, 0x00060038), + MMIOPair(0x00004018, 0x00000000), + MMIOPair(0x0000401C, 0x00000033), + MMIOPair(0x00004020, 0x00060037), + MMIOPair(0x00004024, 0x0000003B), + MMIOPair(0x00004028, 0x00000032), + MMIOPair(0x0000402C, 0x00000036), + MMIOPair(0x00004030, 0x0000003A), + MMIOPair(0x00004034, 0x00000033), + MMIOPair(0x00004038, 0x00000037), + MMIOPair(0x0000403C, 0x0000003B), + MMIOPair(0x00004040, 0x00000030), + MMIOPair(0x00004044, 0x00000034), + MMIOPair(0x00004048, 0x00000038), + MMIOPair(0x0000404C, 0x00000031), + MMIOPair(0x00004050, 0x00000032), + MMIOPair(0x00004054, 0x00000036), + MMIOPair(0x00004058, 0x0000003A), + MMIOPair(0x0000405C, 0x00000033), + MMIOPair(0x00004060, 0x00000037), + MMIOPair(0x00004064, 0x0000003B), + MMIOPair(0x00004068, 0x00000032), + MMIOPair(0x0000406C, 0x00000036), + MMIOPair(0x00004070, 0x0000003A), + MMIOPair(0x00004074, 0x00000033), + MMIOPair(0x00004078, 0x00000037), + MMIOPair(0x0000407C, 0x0000003B), + MMIOPair(0x00004080, 0x00000030), + MMIOPair(0x00004084, 0x00000034), + MMIOPair(0x00004088, 0x00000038), + MMIOPair(0x0000408C, 0x00000031), + MMIOPair(0x00004090, 0x00000032), + MMIOPair(0x00004094, 0x00000036), + MMIOPair(0x00004098, 0x0000003A), + MMIOPair(0x0000409C, 0x00000033), + MMIOPair(0x000040A0, 0x00000037), + MMIOPair(0x000040A4, 0x0000003B), + MMIOPair(0x000040A8, 0x00000032), + MMIOPair(0x000040AC, 0x00000036), + MMIOPair(0x000040B0, 0x0000003A), + MMIOPair(0x000040B4, 0x00000033), + MMIOPair(0x000040B8, 0x00000037), + MMIOPair(0x000040BC, 0x0000003B), + MMIOPair(0x000040C0, 0x00000038), + MMIOPair(0x000040C4, 0x00000034), + MMIOPair(0x000040C8, 0x00000038), + MMIOPair(0x000040CC, 0x00000031), + MMIOPair(0x000040D0, 0x00000032), + MMIOPair(0x000040D4, 0x00000036), + MMIOPair(0x000040D8, 0x0000003A), + MMIOPair(0x000040DC, 0x00000033), + MMIOPair(0x000040E0, 0x00000037), + MMIOPair(0x000040E4, 0x0000003B), + MMIOPair(0x000040E8, 0x00000032), + MMIOPair(0x000040EC, 0x00000036), + MMIOPair(0x000040F0, 0x00000038), + MMIOPair(0x000040F4, 0x00000038), + MMIOPair(0x000040F8, 0x00000038), + MMIOPair(0x000040FC, 0x00000038), + + // LNCF_MOCS + MMIOPair(0x0000B020, 0x00300010), + MMIOPair(0x0000B024, 0x00300010), + MMIOPair(0x0000B028, 0x00300030), + MMIOPair(0x0000B02C, 0x00000000), + MMIOPair(0x0000B030, 0x0030001F), + MMIOPair(0x0000B034, 0x00170013), + MMIOPair(0x0000B038, 0x0000001F), + MMIOPair(0x0000B03C, 0x00000000), + MMIOPair(0x0000B040, 0x00100000), + MMIOPair(0x0000B044, 0x00170013), + MMIOPair(0x0000B048, 0x0010001F), + MMIOPair(0x0000B04C, 0x00170013), + MMIOPair(0x0000B050, 0x0030001F), + MMIOPair(0x0000B054, 0x00170013), + MMIOPair(0x0000B058, 0x0000001F), + MMIOPair(0x0000B05C, 0x00000000), + MMIOPair(0x0000B060, 0x00100000), + MMIOPair(0x0000B064, 0x00170013), + MMIOPair(0x0000B068, 0x0010001F), + MMIOPair(0x0000B06C, 0x00170013), + MMIOPair(0x0000B070, 0x0030001F), + MMIOPair(0x0000B074, 0x00170013), + MMIOPair(0x0000B078, 0x0000001F), + MMIOPair(0x0000B07C, 0x00000000), + MMIOPair(0x0000B080, 0x00300030), + MMIOPair(0x0000B084, 0x00170013), + MMIOPair(0x0000B088, 0x0010001F), + MMIOPair(0x0000B08C, 0x00170013), + MMIOPair(0x0000B090, 0x0030001F), + MMIOPair(0x0000B094, 0x00170013), + MMIOPair(0x0000B098, 0x00300010), + MMIOPair(0x0000B09C, 0x00300010), + + //PAT_INDEX + MMIOPair(0x00004100, 0x0000000), + MMIOPair(0x00004104, 0x0000000), + MMIOPair(0x00004108, 0x0000000), + MMIOPair(0x0000410c, 0x0000000), + MMIOPair(0x00004110, 0x0000000), + MMIOPair(0x00004114, 0x0000000), + MMIOPair(0x00004118, 0x0000000), + MMIOPair(0x0000411c, 0x0000000), + + MMIOPair(0x00004b80, 0xffff1001), //GACB_PERF_CTRL_REG + MMIOPair(0x00007000, 0xffff0000), //CACHE_MODE_0 + MMIOPair(0x00007004, 0xffff0000), //CACHE_MODE_1 + MMIOPair(0x000043F8, 0x00000000), //Gen12 (A-step) chicken bit for AuxT granularity + MMIOPair(0x00009008, 0x00000200), //IDICR + MMIOPair(0x0000900c, 0x00001b40), //SNPCR + MMIOPair(0x0000b120, 0x14000002), //LTCDREG + MMIOPair(0x00042080, 0x00000000), //CHICKEN_MISC_1 + MMIOPair(0x000020D4, 0xFFFF0000), //Chicken bit for CSFE + MMIOPair(0x0000B0A0, 0x00000000), //SCRATCH 2 for LNCF unit + MMIOPair(0x000094D4, 0x00000000), //Slice unit Level Clock Gating Control + + // Capture Perf MMIO register programming + MMIOPair(0x0000B004, 0x2FC0100B), //KM_ARBITER_CTRL_REG + MMIOPair(0x0000B404, 0x00000160), //KM_GLOBAL_INVALIDATION_REG + MMIOPair(0x00008708, 0x00000000), //KM_GEN12_IDI_CONTROL_REGISTER + + // Tiled Resources VA Translation Table L3 Pointer + MMIOPair(0x00004410, 0xffffffff), //GEN12_TRTT_NULL_TILE_REG + MMIOPair(0x00004414, 0xfffffffe), //GEN12_TRTT_INVD_TILE_REG + MMIOPair(0x00004404, 0x000000ff), //GEN12_TRTT_VA_MASKDATA_REG + MMIOPair(0x00004408, 0x00000000), //LDWORD GMM_GEN12_TRTT_L3_POINTER + MMIOPair(0x0000440C, 0x00000000), //UDWORD GMM_GEN12_TRTT_L3_POINTER + MMIOPair(0x00004400, 0x00000001), //GEN12_TRTT_TABLE_CONTROL + MMIOPair(0x00004DFC, 0x00000000), //GEN9_TR_CHICKEN_BIT_VECTOR +}; + +static const MMIOList mmioListRCS = { + MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x00002058), 0x00000000), //CTX_WA_PTR_RCSUNIT + MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000020a8), 0x00000000), //IMR + MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x0000229c), 0xffff8280), //GFX_MODE + + MMIOPair(0x00002090, 0xffff0000), //CHICKEN_PWR_CTX_RASTER_1 + MMIOPair(0x000020e0, 0xffff4000), //FF_SLICE_CS_CHICKEN1_RCSUNIT + MMIOPair(0x000020e4, 0xffff0000), //FF_SLICE_CS_CHICKEN2_RCSUNIT + MMIOPair(0x000020ec, 0xffff0051), //CS_DEBUG_MODE1 + + // FORCE_TO_NONPRIV + MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024d0), 0x00007014), + MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024d4), 0x0000e000), + MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024d8), 0x0000e000), + MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024dc), 0x0000e000), + MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024e0), 0x0000e000), + MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024e4), 0x0000e000), + MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024e8), 0x0000e000), + MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024ec), 0x0000e000), + MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024f0), 0x0000e000), + MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024f4), 0x0000e000), + MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024f8), 0x0000e000), + MMIOPair(AubMemDump::computeRegisterOffset(rcs.mmioBase, 0x000024fc), 0x0000e000), + + MMIOPair(0x00002580, 0xffff0005), //CS_CHICKEN1 + MMIOPair(0x0000e194, 0xffff0002), //CHICKEN_SAMPLER_2 + + MMIOPair(0x0000B134, 0xA0000000) //L3ALLOCREG +}; + +static const MMIOList mmioListBCS = { + MMIOPair(AubMemDump::computeRegisterOffset(bcs.mmioBase, 0x0000229c), 0xffff8280), //GFX_MODE +}; + +static const MMIOList mmioListVCS = { + MMIOPair(AubMemDump::computeRegisterOffset(vcs.mmioBase, 0x0000229c), 0xffff8280), //GFX_MODE +}; + +static const MMIOList mmioListVECS = { + MMIOPair(AubMemDump::computeRegisterOffset(vecs.mmioBase, 0x0000229c), 0xffff8280), //GFX_MODE +}; + +static MMIOList mmioListCCSInstance(uint32_t mmioBase) { + MMIOList mmioList; + + mmioList.push_back(MMIOPair(0x0000ce90, 0x00030003)); //GFX_MULT_CTXT_CTL - enable multi-context with 4CCS + mmioList.push_back(MMIOPair(0x0000b170, 0x00030003)); //MULT_CTXT_CTL - enable multi-context with 4CCS + mmioList.push_back(MMIOPair(0x00014800, 0xFFFF0001)); //RCU_MODE + mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x0000229c), 0xffff8280)); //GFX_MODE + + // FORCE_TO_NONPRIV + mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024d0), 0x0000e000)); + mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024d4), 0x0000e000)); + mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024d8), 0x0000e000)); + mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024dc), 0x0000e000)); + mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024e0), 0x0000e000)); + mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024e4), 0x0000e000)); + mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024e8), 0x0000e000)); + mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024ec), 0x0000e000)); + mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024f0), 0x0000e000)); + mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024f4), 0x0000e000)); + mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024f8), 0x0000e000)); + mmioList.push_back(MMIOPair(AubMemDump::computeRegisterOffset(mmioBase, 0x000024fc), 0x0000e000)); + + mmioList.push_back(MMIOPair(0x0000B234, 0xA0000000)); //L3ALLOCREG_CCS0 + + return mmioList; +}; + +static const MMIOList mmioListCCS = mmioListCCSInstance(ccs.mmioBase); +static const MMIOList mmioListCCS1 = mmioListCCSInstance(ccs1.mmioBase); +static const MMIOList mmioListCCS2 = mmioListCCSInstance(ccs2.mmioBase); +static const MMIOList mmioListCCS3 = mmioListCCSInstance(ccs3.mmioBase); + +const MMIOList *AUBFamilyMapper::perEngineMMIO[aub_stream::NUM_ENGINES] = { + &mmioListRCS, + &mmioListBCS, + &mmioListVCS, + &mmioListVECS, + &mmioListCCS, + &mmioListCCS1, + &mmioListCCS2, + &mmioListCCS3}; +} // namespace NEO + +namespace AubAllocDump { +using namespace NEO; + +template SurfaceInfo *getDumpSurfaceInfo(GraphicsAllocation &gfxAllocation, DumpFormat dumpFormat); + +template uint32_t getImageSurfaceTypeFromGmmResourceType(GMM_RESOURCE_TYPE gmmResourceType); + +template void dumpBufferInBinFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); + +template void dumpImageInBmpFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); + +template void dumpBufferInTreFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); + +template void dumpImageInTreFormat(GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); + +template void dumpAllocation(DumpFormat dumpFormat, GraphicsAllocation &gfxAllocation, AubMemDump::AubFileStream *stream, uint32_t context); +} // namespace AubAllocDump diff --git a/shared/source/aub_mem_dump/definitions/aub_services.h b/shared/source/aub_mem_dump/definitions/aub_services.h index 24daf4a39f..794c959fa8 100644 --- a/shared/source/aub_mem_dump/definitions/aub_services.h +++ b/shared/source/aub_mem_dump/definitions/aub_services.h @@ -102,7 +102,8 @@ struct CmdServicesMemTraceVersion { Lkf = 25, Ehl = 28, Dg1 = 30, - Adls = 37 + Adls = 37, + XeHP_SDV = 29, }; }; struct RecordingMethodValues { @@ -777,7 +778,11 @@ struct CmdServicesMemTraceMemoryWrite { TraceKernelInstructions = 26, TraceVolumeMap = 9, TraceCubeMap = 7, - TraceLogicalRingContextBcs = 49 + TraceLogicalRingContextBcs = 49, + TracePpgttLevel1 = 65, + TracePpgttLevel2 = 66, + TracePpgttLevel3 = 67, + TracePpgttLevel4 = 68 }; }; struct TilingValues { diff --git a/shared/source/command_container/CMakeLists.txt b/shared/source/command_container/CMakeLists.txt index 7b4671cb07..a8a1934887 100644 --- a/shared/source/command_container/CMakeLists.txt +++ b/shared/source/command_container/CMakeLists.txt @@ -13,8 +13,18 @@ set(NEO_CORE_COMMAND_CONTAINER ${CMAKE_CURRENT_SOURCE_DIR}/command_encoder_bdw_plus.inl ${CMAKE_CURRENT_SOURCE_DIR}/encode_compute_mode_bdw_plus.inl ${CMAKE_CURRENT_SOURCE_DIR}/encode_compute_mode_tgllp_plus.inl + ${CMAKE_CURRENT_SOURCE_DIR}/implicit_scaling.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/implicit_scaling.h ) +if(SUPPORT_XEHP_PLUS) + list(APPEND NEO_CORE_COMMAND_CONTAINER + ${CMAKE_CURRENT_SOURCE_DIR}/command_encoder_xehp_plus.inl + ${CMAKE_CURRENT_SOURCE_DIR}/implicit_scaling_xehp_plus.inl + ${CMAKE_CURRENT_SOURCE_DIR}/walker_partition_xehp_plus.h + ) +endif() + set_property(GLOBAL PROPERTY NEO_CORE_COMMAND_CONTAINER ${NEO_CORE_COMMAND_CONTAINER}) add_subdirectories() diff --git a/shared/source/command_container/command_encoder_xehp_plus.inl b/shared/source/command_container/command_encoder_xehp_plus.inl new file mode 100644 index 0000000000..b81e47d39d --- /dev/null +++ b/shared/source/command_container/command_encoder_xehp_plus.inl @@ -0,0 +1,649 @@ +/* + * Copyright (C) 2020-2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once +#include "shared/source/command_container/command_encoder.h" +#include "shared/source/command_container/implicit_scaling.h" +#include "shared/source/command_stream/command_stream_receiver.h" +#include "shared/source/command_stream/linear_stream.h" +#include "shared/source/command_stream/preemption.h" +#include "shared/source/command_stream/stream_properties.h" +#include "shared/source/debug_settings/debug_settings_manager.h" +#include "shared/source/execution_environment/execution_environment.h" +#include "shared/source/gmm_helper/gmm_helper.h" +#include "shared/source/helpers/basic_math.h" +#include "shared/source/helpers/constants.h" +#include "shared/source/helpers/hw_helper.h" +#include "shared/source/helpers/pipeline_select_helper.h" +#include "shared/source/helpers/simd_helper.h" +#include "shared/source/helpers/state_base_address.h" +#include "shared/source/kernel/dispatch_kernel_encoder_interface.h" +#include "shared/source/kernel/kernel_descriptor.h" + +#include "gmm_client_context.h" +#include "pipe_control_args.h" + +#include + +namespace NEO { +constexpr size_t TimestampDestinationAddressAlignment = 16; + +template +void EncodeDispatchKernel::encode(CommandContainer &container, + const void *pThreadGroupDimensions, bool isIndirect, bool isPredicate, DispatchKernelEncoderI *dispatchInterface, + uint64_t eventAddress, bool isTimestampEvent, bool L3FlushEnable, Device *device, PreemptionMode preemptionMode, + bool &requiresUncachedMocs, bool useGlobalAtomics, uint32_t &partitionCount, bool isInternal) { + using SHARED_LOCAL_MEMORY_SIZE = typename Family::INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE; + using STATE_BASE_ADDRESS = typename Family::STATE_BASE_ADDRESS; + using MI_BATCH_BUFFER_END = typename Family::MI_BATCH_BUFFER_END; + using INLINE_DATA = typename Family::INLINE_DATA; + + const HardwareInfo &hwInfo = device->getHardwareInfo(); + + const auto &kernelDescriptor = dispatchInterface->getKernelDescriptor(); + auto sizeCrossThreadData = dispatchInterface->getCrossThreadDataSize(); + auto sizePerThreadDataForWholeGroup = dispatchInterface->getPerThreadDataSizeForWholeThreadGroup(); + + LinearStream *listCmdBufferStream = container.getCommandStream(); + size_t sshOffset = 0; + + auto threadDims = static_cast(pThreadGroupDimensions); + const Vec3 threadStartVec{0, 0, 0}; + Vec3 threadDimsVec{0, 0, 0}; + if (!isIndirect) { + threadDimsVec = {threadDims[0], threadDims[1], threadDims[2]}; + } + size_t estimatedSizeRequired = estimateEncodeDispatchKernelCmdsSize(device, threadStartVec, threadDimsVec, isInternal); + if (container.getCommandStream()->getAvailableSpace() < estimatedSizeRequired) { + auto bbEnd = listCmdBufferStream->getSpaceForCmd(); + *bbEnd = Family::cmdInitBatchBufferEnd; + + container.allocateNextCommandBuffer(); + } + + if (kernelDescriptor.extendedInfo) { + bool specialModeRequired = kernelDescriptor.extendedInfo->specialPipelineSelectModeRequired(); + if (container.lastPipelineSelectModeRequired != specialModeRequired) { + container.lastPipelineSelectModeRequired = specialModeRequired; + EncodeComputeMode::adjustPipelineSelect(container, kernelDescriptor); + } + } + + WALKER_TYPE walkerCmd = Family::cmdInitGpgpuWalker; + auto &idd = walkerCmd.getInterfaceDescriptor(); + + bool localIdsGenerationByRuntime = dispatchInterface->requiresGenerationOfLocalIdsByRuntime(); + bool inlineDataProgramming = EncodeDispatchKernel::inlineDataProgrammingRequired(kernelDescriptor); + { + auto alloc = dispatchInterface->getIsaAllocation(); + UNRECOVERABLE_IF(nullptr == alloc); + auto offset = alloc->getGpuAddressToPatch(); + if (!localIdsGenerationByRuntime) { + offset += kernelDescriptor.entryPoints.skipPerThreadDataLoad; + } + idd.setKernelStartPointer(offset); + idd.setKernelStartPointerHigh(0u); + } + + auto threadsPerThreadGroup = dispatchInterface->getNumThreadsPerThreadGroup(); + idd.setNumberOfThreadsInGpgpuThreadGroup(threadsPerThreadGroup); + + EncodeDispatchKernel::programBarrierEnable(idd, + kernelDescriptor.kernelAttributes.barrierCount, + hwInfo); + + auto slmSize = static_cast( + HwHelperHw::get().computeSlmValues(hwInfo, dispatchInterface->getSlmTotalSize())); + + if (DebugManager.flags.OverrideSlmAllocationSize.get() != -1) { + slmSize = static_cast(DebugManager.flags.OverrideSlmAllocationSize.get()); + } + idd.setSharedLocalMemorySize(slmSize); + + auto bindingTableStateCount = kernelDescriptor.payloadMappings.bindingTable.numEntries; + uint32_t bindingTablePointer = 0u; + if (kernelDescriptor.kernelAttributes.bufferAddressingMode == KernelDescriptor::BindfulAndStateless) { + container.prepareBindfulSsh(); + if (bindingTableStateCount > 0u) { + auto ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, dispatchInterface->getSurfaceStateHeapDataSize(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); + sshOffset = ssh->getUsed(); + bindingTablePointer = static_cast(EncodeSurfaceState::pushBindingTableAndSurfaceStates( + *ssh, bindingTableStateCount, + dispatchInterface->getSurfaceStateHeapData(), + dispatchInterface->getSurfaceStateHeapDataSize(), bindingTableStateCount, + kernelDescriptor.payloadMappings.bindingTable.tableOffset)); + } + } + idd.setBindingTablePointer(bindingTablePointer); + + PreemptionHelper::programInterfaceDescriptorDataPreemption(&idd, preemptionMode); + + auto heap = ApiSpecificConfig::getBindlessConfiguration() ? device->getBindlessHeapsHelper()->getHeap(BindlessHeapsHelper::GLOBAL_DSH) : container.getIndirectHeap(HeapType::DYNAMIC_STATE); + UNRECOVERABLE_IF(!heap); + + uint32_t samplerStateOffset = 0; + uint32_t samplerCount = 0; + + if (kernelDescriptor.payloadMappings.samplerTable.numSamplers > 0) { + samplerCount = kernelDescriptor.payloadMappings.samplerTable.numSamplers; + samplerStateOffset = EncodeStates::copySamplerState( + heap, kernelDescriptor.payloadMappings.samplerTable.tableOffset, + kernelDescriptor.payloadMappings.samplerTable.numSamplers, kernelDescriptor.payloadMappings.samplerTable.borderColor, + dispatchInterface->getDynamicStateHeapData(), + device->getBindlessHeapsHelper()); + if (ApiSpecificConfig::getBindlessConfiguration()) { + container.getResidencyContainer().push_back(device->getBindlessHeapsHelper()->getHeap(NEO::BindlessHeapsHelper::BindlesHeapType::GLOBAL_DSH)->getGraphicsAllocation()); + } + } + + idd.setSamplerStatePointer(samplerStateOffset); + + EncodeDispatchKernel::adjustBindingTablePrefetch(idd, samplerCount, bindingTableStateCount); + + uint64_t offsetThreadData = 0u; + const uint32_t inlineDataSize = sizeof(INLINE_DATA); + auto crossThreadData = dispatchInterface->getCrossThreadData(); + + if (inlineDataProgramming) { + auto copySize = std::min(inlineDataSize, sizeCrossThreadData); + auto dest = reinterpret_cast(walkerCmd.getInlineDataPointer()); + memcpy_s(dest, copySize, crossThreadData, copySize); + auto offset = std::min(inlineDataSize, sizeCrossThreadData); + sizeCrossThreadData -= copySize; + crossThreadData = ptrOffset(crossThreadData, offset); + inlineDataProgramming = copySize != 0; + } + + uint32_t sizeThreadData = sizePerThreadDataForWholeGroup + sizeCrossThreadData; + { + auto heap = container.getIndirectHeap(HeapType::INDIRECT_OBJECT); + UNRECOVERABLE_IF(!heap); + heap->align(WALKER_TYPE::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); + + auto ptr = container.getHeapSpaceAllowGrow(HeapType::INDIRECT_OBJECT, sizeThreadData); + UNRECOVERABLE_IF(!ptr); + offsetThreadData = (is64bit ? heap->getHeapGpuStartOffset() : heap->getHeapGpuBase()) + static_cast(heap->getUsed() - sizeThreadData); + + if (sizeCrossThreadData > 0) { + memcpy_s(ptr, sizeCrossThreadData, + crossThreadData, sizeCrossThreadData); + } + if (isIndirect) { + void *gpuPtr = reinterpret_cast(heap->getHeapGpuBase() + heap->getUsed() - sizeThreadData); + EncodeIndirectParams::setGroupCountIndirect(container, kernelDescriptor.payloadMappings.dispatchTraits.numWorkGroups, gpuPtr); + EncodeIndirectParams::setGlobalWorkSizeIndirect(container, kernelDescriptor.payloadMappings.dispatchTraits.globalWorkSize, gpuPtr, dispatchInterface->getGroupSize()); + } + + auto perThreadDataPtr = dispatchInterface->getPerThreadData(); + if (perThreadDataPtr != nullptr) { + ptr = ptrOffset(ptr, sizeCrossThreadData); + memcpy_s(ptr, sizePerThreadDataForWholeGroup, + perThreadDataPtr, sizePerThreadDataForWholeGroup); + } + } + + bool requiresGlobalAtomicsUpdate = false; + if (ImplicitScalingHelper::isImplicitScalingEnabled(container.getDevice()->getDeviceBitfield(), true)) { + requiresGlobalAtomicsUpdate = container.lastSentUseGlobalAtomics != useGlobalAtomics; + container.lastSentUseGlobalAtomics = useGlobalAtomics; + } + + if (container.isAnyHeapDirty() || requiresUncachedMocs || requiresGlobalAtomicsUpdate) { + PipeControlArgs args(true); + MemorySynchronizationCommands::addPipeControl(*container.getCommandStream(), args); + STATE_BASE_ADDRESS sbaCmd; + auto gmmHelper = container.getDevice()->getGmmHelper(); + uint32_t statelessMocsIndex = + requiresUncachedMocs ? (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1) : (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1); + EncodeStateBaseAddress::encode(container, sbaCmd, statelessMocsIndex, useGlobalAtomics); + container.setDirtyStateForAllHeaps(false); + requiresUncachedMocs = false; + } + + walkerCmd.setIndirectDataStartAddress(static_cast(offsetThreadData)); + walkerCmd.setIndirectDataLength(sizeThreadData); + + EncodeDispatchKernel::encodeThreadData(walkerCmd, + nullptr, + threadDims, + dispatchInterface->getGroupSize(), + kernelDescriptor.kernelAttributes.simdSize, + kernelDescriptor.kernelAttributes.numLocalIdChannels, + dispatchInterface->getNumThreadsPerThreadGroup(), + dispatchInterface->getThreadExecutionMask(), + localIdsGenerationByRuntime, + inlineDataProgramming, + isIndirect, + dispatchInterface->getRequiredWorkgroupOrder()); + + using POSTSYNC_DATA = typename Family::POSTSYNC_DATA; + auto &postSync = walkerCmd.getPostSync(); + if (eventAddress != 0) { + postSync.setDataportPipelineFlush(true); + postSync.setL3Flush(L3FlushEnable); + if (isTimestampEvent) { + postSync.setOperation(POSTSYNC_DATA::OPERATION_WRITE_TIMESTAMP); + } else { + uint32_t STATE_SIGNALED = 0u; + postSync.setOperation(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA); + postSync.setImmediateData(STATE_SIGNALED); + } + UNRECOVERABLE_IF(!(isAligned(eventAddress))); + postSync.setDestinationAddress(eventAddress); + + auto gmmHelper = device->getRootDeviceEnvironment().getGmmHelper(); + postSync.setMocs(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED)); + + EncodeDispatchKernel::adjustTimestampPacket(walkerCmd, hwInfo); + } + + walkerCmd.setPredicateEnable(isPredicate); + + EncodeDispatchKernel::adjustInterfaceDescriptorData(idd, hwInfo); + + EncodeDispatchKernel::appendAdditionalIDDFields(&idd, hwInfo, threadsPerThreadGroup, + dispatchInterface->getSlmTotalSize(), + dispatchInterface->getSlmPolicy()); + + EncodeDispatchKernel::encodeAdditionalWalkerFields(hwInfo, walkerCmd); + + PreemptionHelper::applyPreemptionWaCmdsBegin(listCmdBufferStream, *device); + + if (ImplicitScalingHelper::isImplicitScalingEnabled(device->getDeviceBitfield(), true) && + !isInternal) { + const uint64_t workPartitionAllocationGpuVa = device->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocationGpuAddress(); + ImplicitScalingDispatch::dispatchCommands(*listCmdBufferStream, + walkerCmd, + device->getDeviceBitfield(), + partitionCount, + true, + true, + false, + workPartitionAllocationGpuVa); + } else { + partitionCount = 1; + auto buffer = listCmdBufferStream->getSpace(sizeof(walkerCmd)); + *(decltype(walkerCmd) *)buffer = walkerCmd; + } + + PreemptionHelper::applyPreemptionWaCmdsEnd(listCmdBufferStream, *device); +} + +template +inline void EncodeDispatchKernel::encodeAdditionalWalkerFields(const HardwareInfo &hwInfo, WALKER_TYPE &walkerCmd) { +} + +template +bool EncodeDispatchKernel::isRuntimeLocalIdsGenerationRequired(uint32_t activeChannels, + size_t *lws, + std::array walkOrder, + bool requireInputWalkOrder, + uint32_t &requiredWalkOrder, + uint32_t simd) { + if (simd == 1) { + return true; + } + bool hwGenerationOfLocalIdsEnabled = true; + if (DebugManager.flags.EnableHwGenerationLocalIds.get() != -1) { + hwGenerationOfLocalIdsEnabled = !!DebugManager.flags.EnableHwGenerationLocalIds.get(); + } + if (hwGenerationOfLocalIdsEnabled) { + if (activeChannels == 0) { + return false; + } + + size_t totalLwsSize = 1u; + for (auto dimension = 0u; dimension < activeChannels; dimension++) { + totalLwsSize *= lws[dimension]; + } + + if (totalLwsSize > 1024u) { + return true; + } + + //make sure table below matches Hardware Spec + constexpr uint32_t walkOrderPossibilties = 6u; + constexpr uint8_t possibleWalkOrders[walkOrderPossibilties][3] = {{0, 1, 2}, + {0, 2, 1}, + {1, 0, 2}, + {2, 0, 1}, + {1, 2, 0}, + {2, 1, 0}}; + + //check if we need to follow kernel requirements + if (requireInputWalkOrder) { + for (uint32_t dimension = 0; dimension < activeChannels - 1; dimension++) { + if (!Math::isPow2(lws[walkOrder[dimension]])) { + return true; + } + } + + auto index = 0u; + while (index < walkOrderPossibilties) { + if (walkOrder[0] == possibleWalkOrders[index][0] && + walkOrder[1] == possibleWalkOrders[index][1]) { + break; + }; + index++; + } + DEBUG_BREAK_IF(index >= walkOrderPossibilties); + + requiredWalkOrder = index; + return false; + } + + //kernel doesn't specify any walk order requirements, check if we have any compatible + for (uint32_t walkOrder = 0; walkOrder < walkOrderPossibilties; walkOrder++) { + bool allDimensionsCompatible = true; + for (uint32_t dimension = 0; dimension < activeChannels - 1; dimension++) { + if (!Math::isPow2(lws[possibleWalkOrders[walkOrder][dimension]])) { + allDimensionsCompatible = false; + break; + } + } + if (allDimensionsCompatible) { + requiredWalkOrder = walkOrder; + return false; + } + } + } + return true; +} + +template +void EncodeDispatchKernel::encodeThreadData(WALKER_TYPE &walkerCmd, + const uint32_t *startWorkGroup, + const uint32_t *numWorkGroups, + const uint32_t *workGroupSizes, + uint32_t simd, + uint32_t localIdDimensions, + uint32_t threadsPerThreadGroup, + uint32_t threadExecutionMask, + bool localIdsGenerationByRuntime, + bool inlineDataProgrammingRequired, + bool isIndirect, + uint32_t requiredWorkGroupOrder) { + + if (isIndirect) { + walkerCmd.setIndirectParameterEnable(true); + } else { + walkerCmd.setThreadGroupIdXDimension(static_cast(numWorkGroups[0])); + walkerCmd.setThreadGroupIdYDimension(static_cast(numWorkGroups[1])); + walkerCmd.setThreadGroupIdZDimension(static_cast(numWorkGroups[2])); + } + + if (startWorkGroup) { + walkerCmd.setThreadGroupIdStartingX(static_cast(startWorkGroup[0])); + walkerCmd.setThreadGroupIdStartingY(static_cast(startWorkGroup[1])); + walkerCmd.setThreadGroupIdStartingZ(static_cast(startWorkGroup[2])); + } + + uint64_t executionMask = threadExecutionMask; + if (executionMask == 0) { + auto workGroupSize = workGroupSizes[0] * workGroupSizes[1] * workGroupSizes[2]; + auto remainderSimdLanes = workGroupSize & (simd - 1); + executionMask = maxNBitValue(remainderSimdLanes); + if (!executionMask) { + executionMask = maxNBitValue((simd == 1) ? 32 : simd); + } + } + + walkerCmd.setExecutionMask(static_cast(executionMask)); + walkerCmd.setSimdSize(getSimdConfig(simd)); + + walkerCmd.setMessageSimd(walkerCmd.getSimdSize()); + + //1) cross-thread inline data will be put into R1, but if kernel uses local ids, then cross-thread should be put further back + //so whenever local ids are driver or hw generated, reserve space by setting right values for emitLocalIds + //2) Auto-generation of local ids should be possible, when in fact local ids are used + if (!localIdsGenerationByRuntime && localIdDimensions > 0) { + UNRECOVERABLE_IF(localIdDimensions != 3); + uint32_t emitLocalIdsForDim = (1 << 0) | (1 << 1) | (1 << 2); + walkerCmd.setEmitLocalId(emitLocalIdsForDim); + + walkerCmd.setLocalXMaximum(static_cast(workGroupSizes[0] - 1)); + walkerCmd.setLocalYMaximum(static_cast(workGroupSizes[1] - 1)); + walkerCmd.setLocalZMaximum(static_cast(workGroupSizes[2] - 1)); + + walkerCmd.setGenerateLocalId(1); + walkerCmd.setWalkOrder(requiredWorkGroupOrder); + } + if (inlineDataProgrammingRequired == true) { + walkerCmd.setEmitInlineParameter(1); + } +} + +template +size_t EncodeDispatchKernel::estimateEncodeDispatchKernelCmdsSize(Device *device, Vec3 groupStart, Vec3 groupCount, + bool isInternal) { + size_t totalSize = sizeof(WALKER_TYPE); + totalSize += PreemptionHelper::getPreemptionWaCsSize(*device); + totalSize += EncodeStates::getAdjustStateComputeModeSize(); + totalSize += EncodeIndirectParams::getCmdsSizeForIndirectParams(); + totalSize += EncodeIndirectParams::getCmdsSizeForSetGroupCountIndirect(); + totalSize += EncodeIndirectParams::getCmdsSizeForSetGroupSizeIndirect(); + if (ImplicitScalingHelper::isImplicitScalingEnabled(device->getDeviceBitfield(), true) && + !isInternal) { + const bool staticPartitioning = device->getDefaultEngine().commandStreamReceiver->isStaticWorkPartitioningEnabled(); + totalSize += ImplicitScalingDispatch::getSize(true, staticPartitioning, device->getDeviceBitfield(), groupStart, groupCount); + } + + return totalSize; +} + +template +void EncodeStateBaseAddress::encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd) { + auto gmmHelper = container.getDevice()->getRootDeviceEnvironment().getGmmHelper(); + uint32_t statelessMocsIndex = (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1); + EncodeStateBaseAddress::encode(container, sbaCmd, statelessMocsIndex, false); +} + +template +void EncodeStateBaseAddress::encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd, uint32_t statelessMocsIndex, bool useGlobalAtomics) { + auto gmmHelper = container.getDevice()->getRootDeviceEnvironment().getGmmHelper(); + bool multiOsContextCapable = + ImplicitScalingHelper::isImplicitScalingEnabled(container.getDevice()->getDeviceBitfield(), true); + + StateBaseAddressHelper::programStateBaseAddress( + &sbaCmd, + container.isHeapDirty(HeapType::DYNAMIC_STATE) ? container.getIndirectHeap(HeapType::DYNAMIC_STATE) : nullptr, + container.isHeapDirty(HeapType::INDIRECT_OBJECT) ? container.getIndirectHeap(HeapType::INDIRECT_OBJECT) : nullptr, + container.isHeapDirty(HeapType::SURFACE_STATE) ? container.getIndirectHeap(HeapType::SURFACE_STATE) : nullptr, + 0, + true, + statelessMocsIndex, + container.getIndirectObjectHeapBaseAddress(), + container.getInstructionHeapBaseAddress(), + 0, + true, + false, + gmmHelper, + multiOsContextCapable, + MemoryCompressionState::NotApplicable, + useGlobalAtomics, + 1u); + + auto pCmd = reinterpret_cast(container.getCommandStream()->getSpace(sizeof(STATE_BASE_ADDRESS))); + *pCmd = sbaCmd; + + if (container.isHeapDirty(HeapType::SURFACE_STATE)) { + auto heap = container.getIndirectHeap(HeapType::SURFACE_STATE); + auto cmd = Family::cmdInitStateBindingTablePoolAlloc; + cmd.setBindingTablePoolBaseAddress(heap->getHeapGpuBase()); + cmd.setBindingTablePoolBufferSize(heap->getHeapSizeInPages()); + cmd.setSurfaceObjectControlStateIndexToMocsTables(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER)); + + auto buffer = container.getCommandStream()->getSpace(sizeof(cmd)); + *(typename Family::_3DSTATE_BINDING_TABLE_POOL_ALLOC *)buffer = cmd; + } +} + +template +void EncodeComputeMode::adjustComputeMode(LinearStream &csr, void *const stateComputeModePtr, StateComputeModeProperties &properties) { + using STATE_COMPUTE_MODE = typename Family::STATE_COMPUTE_MODE; + using FORCE_NON_COHERENT = typename STATE_COMPUTE_MODE::FORCE_NON_COHERENT; + + STATE_COMPUTE_MODE stateComputeMode = (stateComputeModePtr != nullptr) ? *(static_cast(stateComputeModePtr)) : Family::cmdInitStateComputeMode; + auto maskBits = stateComputeMode.getMaskBits(); + + if (properties.isCoherencyRequired.isDirty) { + FORCE_NON_COHERENT coherencyValue = !properties.isCoherencyRequired.value ? FORCE_NON_COHERENT::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT + : FORCE_NON_COHERENT::FORCE_NON_COHERENT_FORCE_DISABLED; + stateComputeMode.setForceNonCoherent(coherencyValue); + maskBits |= Family::stateComputeModeForceNonCoherentMask; + } + + if (properties.largeGrfMode.isDirty) { + stateComputeMode.setLargeGrfMode(properties.largeGrfMode.value); + maskBits |= Family::stateComputeModeLargeGrfModeMask; + } + + stateComputeMode.setMaskBits(maskBits); + + auto buffer = csr.getSpaceForCmd(); + *buffer = stateComputeMode; +} + +template +void EncodeComputeMode::adjustPipelineSelect(CommandContainer &container, const NEO::KernelDescriptor &kernelDescriptor) { + using PIPELINE_SELECT = typename Family::PIPELINE_SELECT; + auto pipelineSelectCmd = Family::cmdInitPipelineSelect; + + if (kernelDescriptor.extendedInfo && kernelDescriptor.extendedInfo->specialPipelineSelectModeRequired()) { + pipelineSelectCmd.setSystolicModeEnable(true); + } else { + pipelineSelectCmd.setSystolicModeEnable(false); + } + + if (DebugManager.flags.OverrideSystolicPipelineSelect.get() != -1) { + pipelineSelectCmd.setSystolicModeEnable(DebugManager.flags.OverrideSystolicPipelineSelect.get()); + } + + pipelineSelectCmd.setMaskBits(pipelineSelectSystolicModeEnableMaskBits); + pipelineSelectCmd.setPipelineSelection(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU); + + auto buffer = container.getCommandStream()->getSpace(sizeof(pipelineSelectCmd)); + *(decltype(pipelineSelectCmd) *)buffer = pipelineSelectCmd; +} + +template +inline void EncodeMediaInterfaceDescriptorLoad::encode(CommandContainer &container) { +} + +template +void EncodeMiFlushDW::appendMiFlushDw(MI_FLUSH_DW *miFlushDwCmd) { + miFlushDwCmd->setFlushCcs(1); + miFlushDwCmd->setFlushLlc(1); +} + +template +void EncodeMiFlushDW::programMiFlushDwWA(LinearStream &commandStream) { + auto miFlushDwCmd = commandStream.getSpaceForCmd(); + *miFlushDwCmd = Family::cmdInitMiFlushDw; +} + +template +size_t EncodeMiFlushDW::getMiFlushDwWaSize() { + return sizeof(typename Family::MI_FLUSH_DW); +} + +template +bool EncodeSurfaceState::doBindingTablePrefetch() { + return false; +} + +template +void EncodeSurfaceState::encodeExtraBufferParams(R_SURFACE_STATE *surfaceState, GraphicsAllocation *allocation, GmmHelper *gmmHelper, + bool isReadOnly, uint32_t numAvailableDevices, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) { + Gmm *gmm = allocation ? allocation->getDefaultGmm() : nullptr; + uint32_t compressionFormat = 0; + + bool setConstCachePolicy = false; + if (allocation && allocation->getAllocationType() == GraphicsAllocation::AllocationType::CONSTANT_SURFACE) { + setConstCachePolicy = true; + } + + if (surfaceState->getMemoryObjectControlState() == gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) && + DebugManager.flags.ForceL1Caching.get() != 0) { + setConstCachePolicy = true; + } + + if (setConstCachePolicy == true) { + surfaceState->setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST)); + } + + encodeExtraCacheSettings(surfaceState, *gmmHelper->getHardwareInfo()); + DeviceBitfield deviceBitfield{static_cast(maxNBitValue(numAvailableDevices))}; + bool implicitScaling = ImplicitScalingHelper::isImplicitScalingEnabled(deviceBitfield, true); + bool enablePartialWrites = implicitScaling; + bool enableMultiGpuAtomics = enablePartialWrites; + + if (DebugManager.flags.EnableMultiGpuAtomicsOptimization.get()) { + enableMultiGpuAtomics = useGlobalAtomics && (enablePartialWrites || areMultipleSubDevicesInContext); + } + + surfaceState->setDisableSupportForMultiGpuAtomics(!enableMultiGpuAtomics); + surfaceState->setDisableSupportForMultiGpuPartialWrites(!enablePartialWrites); + + if (DebugManager.flags.ForceMultiGpuAtomics.get() != -1) { + surfaceState->setDisableSupportForMultiGpuAtomics(!!DebugManager.flags.ForceMultiGpuAtomics.get()); + } + + if (DebugManager.flags.ForceMultiGpuPartialWrites.get() != -1) { + surfaceState->setDisableSupportForMultiGpuPartialWrites(!!DebugManager.flags.ForceMultiGpuPartialWrites.get()); + } + + if (EncodeSurfaceState::isAuxModeEnabled(surfaceState, gmm)) { + auto resourceFormat = gmm->gmmResourceInfo->getResourceFormat(); + compressionFormat = gmmHelper->getClientContext()->getSurfaceStateCompressionFormat(resourceFormat); + + if (DebugManager.flags.ForceBufferCompressionFormat.get() != -1) { + compressionFormat = DebugManager.flags.ForceBufferCompressionFormat.get(); + } + } + + if (DebugManager.flags.EnableStatelessCompressionWithUnifiedMemory.get()) { + if (allocation && !MemoryPool::isSystemMemoryPool(allocation->getMemoryPool())) { + setCoherencyType(surfaceState, R_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT); + setBufferAuxParamsForCCS(surfaceState); + compressionFormat = DebugManager.flags.FormatForStatelessCompressionWithUnifiedMemory.get(); + } + } + + surfaceState->setCompressionFormat(compressionFormat); +} + +template +inline void EncodeSurfaceState::setCoherencyType(R_SURFACE_STATE *surfaceState, COHERENCY_TYPE coherencyType) { + surfaceState->setCoherencyType(R_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT); +} + +template +void EncodeSempahore::programMiSemaphoreWait(MI_SEMAPHORE_WAIT *cmd, + uint64_t compareAddress, + uint32_t compareData, + COMPARE_OPERATION compareMode, + bool registerPollMode) { + MI_SEMAPHORE_WAIT localCmd = Family::cmdInitMiSemaphoreWait; + localCmd.setCompareOperation(compareMode); + localCmd.setSemaphoreDataDword(compareData); + localCmd.setSemaphoreGraphicsAddress(compareAddress); + localCmd.setWaitMode(MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE); + localCmd.setRegisterPollMode(registerPollMode ? MI_SEMAPHORE_WAIT::REGISTER_POLL_MODE::REGISTER_POLL_MODE_REGISTER_POLL : MI_SEMAPHORE_WAIT::REGISTER_POLL_MODE::REGISTER_POLL_MODE_MEMORY_POLL); + + *cmd = localCmd; +} + +template +inline void EncodeWA::encodeAdditionalPipelineSelect(Device &device, LinearStream &stream, bool is3DPipeline) {} + +template +inline size_t EncodeWA::getAdditionalPipelineSelectSize(Device &device) { + return 0u; +} + +} // namespace NEO diff --git a/shared/source/command_container/image_surface_state/CMakeLists.txt b/shared/source/command_container/image_surface_state/CMakeLists.txt index f442f3b56f..25946b5b03 100644 --- a/shared/source/command_container/image_surface_state/CMakeLists.txt +++ b/shared/source/command_container/image_surface_state/CMakeLists.txt @@ -8,6 +8,7 @@ set(NEO_CORE_COMMAND_CONTAINER_IMAGE_SURFACE_STATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/compression_params_bdw_plus.inl ${CMAKE_CURRENT_SOURCE_DIR}/compression_params_tgllp_plus.inl + ${CMAKE_CURRENT_SOURCE_DIR}/compression_params_xehp_plus.inl ) set_property(GLOBAL APPEND PROPERTY NEO_CORE_COMMAND_CONTAINER ${NEO_CORE_COMMAND_CONTAINER_IMAGE_SURFACE_STATE}) diff --git a/shared/source/command_container/image_surface_state/compression_params_xehp_plus.inl b/shared/source/command_container/image_surface_state/compression_params_xehp_plus.inl new file mode 100644 index 0000000000..2368a00d0e --- /dev/null +++ b/shared/source/command_container/image_surface_state/compression_params_xehp_plus.inl @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/gmm_helper/resource_info.h" + +#include "gmm_client_context.h" + +namespace NEO { + +template +void EncodeSurfaceState::appendImageCompressionParams(R_SURFACE_STATE *surfaceState, GraphicsAllocation *allocation, GmmHelper *gmmHelper, bool imageFromBuffer) { + const auto ccsMode = R_SURFACE_STATE::AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E; + if ((ccsMode == surfaceState->getAuxiliarySurfaceMode() || surfaceState->getMemoryCompressionEnable())) { + uint8_t compressionFormat; + auto gmmResourceInfo = allocation->getDefaultGmm()->gmmResourceInfo.get(); + if (gmmResourceInfo->getResourceFlags()->Info.MediaCompressed) { + compressionFormat = gmmHelper->getClientContext()->getMediaSurfaceStateCompressionFormat(gmmResourceInfo->getResourceFormat()); + } else { + compressionFormat = gmmHelper->getClientContext()->getSurfaceStateCompressionFormat(gmmResourceInfo->getResourceFormat()); + } + + if (imageFromBuffer) { + if (DebugManager.flags.ForceBufferCompressionFormat.get() != -1) { + compressionFormat = DebugManager.flags.ForceBufferCompressionFormat.get(); + } + appendParamsForImageFromBuffer(surfaceState); + } + + surfaceState->setCompressionFormat(compressionFormat); + } +} +} // namespace NEO diff --git a/shared/source/command_container/implicit_scaling.cpp b/shared/source/command_container/implicit_scaling.cpp new file mode 100644 index 0000000000..eb963a525e --- /dev/null +++ b/shared/source/command_container/implicit_scaling.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/command_container/implicit_scaling.h" + +#include "shared/source/debug_settings/debug_settings_manager.h" +#include "shared/source/os_interface/os_interface.h" + +namespace NEO { + +bool ImplicitScalingHelper::isImplicitScalingEnabled(const DeviceBitfield &devices, bool preCondition) { + bool partitionWalker = (devices.count() > 1u) && + preCondition && + ImplicitScaling::apiSupport; + + if (DebugManager.flags.EnableWalkerPartition.get() != -1) { + partitionWalker = !!DebugManager.flags.EnableWalkerPartition.get(); + } + //we can't do this without local memory + partitionWalker &= OSInterface::osEnableLocalMemory; + + return partitionWalker; +} + +bool ImplicitScalingHelper::isSynchronizeBeforeExecutionRequired() { + auto synchronizeBeforeExecution = false; + if (DebugManager.flags.SynchronizeWalkerInWparidMode.get() != -1) { + synchronizeBeforeExecution = static_cast(DebugManager.flags.SynchronizeWalkerInWparidMode.get()); + } + return synchronizeBeforeExecution; +} + +} // namespace NEO diff --git a/shared/source/command_container/implicit_scaling.h b/shared/source/command_container/implicit_scaling.h new file mode 100644 index 0000000000..2f48914cd9 --- /dev/null +++ b/shared/source/command_container/implicit_scaling.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once + +#include "shared/source/helpers/common_types.h" +#include "shared/source/helpers/vec.h" + +namespace NEO { +class LinearStream; + +namespace ImplicitScaling { +extern bool apiSupport; +} + +struct ImplicitScalingHelper { + static bool isImplicitScalingEnabled(const DeviceBitfield &devices, bool preCondition); + static bool isSynchronizeBeforeExecutionRequired(); +}; + +template +struct ImplicitScalingDispatch { + using WALKER_TYPE = typename GfxFamily::WALKER_TYPE; + + static size_t getSize(bool nativeCrossTileAtomicSync, + bool preferStaticPartitioning, + const DeviceBitfield &devices, + Vec3 groupStart, + Vec3 groupCount); + static void dispatchCommands(LinearStream &commandStream, + WALKER_TYPE &walkerCmd, + const DeviceBitfield &devices, + uint32_t &partitionCount, + bool useSecondaryBatchBuffer, + bool nativeCrossTileAtomicSync, + bool usesImages, + uint64_t workPartitionAllocationGpuVa); +}; + +} // namespace NEO diff --git a/shared/source/command_container/implicit_scaling_xehp_plus.inl b/shared/source/command_container/implicit_scaling_xehp_plus.inl new file mode 100644 index 0000000000..cb9ef87f8a --- /dev/null +++ b/shared/source/command_container/implicit_scaling_xehp_plus.inl @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/command_container/implicit_scaling.h" +#include "shared/source/command_container/walker_partition_xehp_plus.h" +#include "shared/source/command_stream/linear_stream.h" + +namespace NEO { + +template +size_t ImplicitScalingDispatch::getSize(bool nativeCrossTileAtomicSync, + bool preferStaticPartitioning, + const DeviceBitfield &devices, + Vec3 groupStart, + Vec3 groupCount) { + typename GfxFamily::COMPUTE_WALKER::PARTITION_TYPE partitionType{}; + bool staticPartitioning = false; + const uint32_t tileCount = static_cast(devices.count()); + const uint32_t partitionCount = WalkerPartition::computePartitionCountAndPartitionType(tileCount, + preferStaticPartitioning, + groupStart, + groupCount, + {}, + &partitionType, + &staticPartitioning); + UNRECOVERABLE_IF(staticPartitioning && (tileCount != partitionCount)); + + auto synchronizeBeforeExecution = ImplicitScalingHelper::isSynchronizeBeforeExecutionRequired(); + return static_cast(WalkerPartition::estimateSpaceRequiredInCommandBuffer( + false, 16u, synchronizeBeforeExecution, nativeCrossTileAtomicSync, staticPartitioning)); +} + +template +void ImplicitScalingDispatch::dispatchCommands(LinearStream &commandStream, + WALKER_TYPE &walkerCmd, + const DeviceBitfield &devices, + uint32_t &partitionCount, + bool useSecondaryBatchBuffer, + bool nativeCrossTileAtomicSync, + bool usesImages, + uint64_t workPartitionAllocationGpuVa) { + uint32_t totalProgrammedSize = 0u; + const uint32_t tileCount = static_cast(devices.count()); + const bool preferStaticPartitioning = workPartitionAllocationGpuVa != 0u; + + bool staticPartitioning = false; + partitionCount = WalkerPartition::computePartitionCountAndSetPartitionType(&walkerCmd, tileCount, preferStaticPartitioning, usesImages, &staticPartitioning); + const bool synchronizeBeforeExecution = ImplicitScalingHelper::isSynchronizeBeforeExecutionRequired(); + if (staticPartitioning) { + UNRECOVERABLE_IF(tileCount != partitionCount); + WalkerPartition::constructStaticallyPartitionedCommandBuffer(commandStream.getSpace(0u), + commandStream.getGraphicsAllocation()->getGpuAddress() + commandStream.getUsed(), + &walkerCmd, + totalProgrammedSize, + partitionCount, + tileCount, + synchronizeBeforeExecution, + useSecondaryBatchBuffer, + nativeCrossTileAtomicSync, + workPartitionAllocationGpuVa); + } else { + if (DebugManager.flags.ExperimentalSetWalkerPartitionCount.get()) { + partitionCount = DebugManager.flags.ExperimentalSetWalkerPartitionCount.get(); + if (partitionCount == 1u) { + walkerCmd.setPartitionType(GfxFamily::COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED); + } + } + + WalkerPartition::constructDynamicallyPartitionedCommandBuffer(commandStream.getSpace(0u), + commandStream.getGraphicsAllocation()->getGpuAddress() + commandStream.getUsed(), + &walkerCmd, totalProgrammedSize, + partitionCount, tileCount, + false, synchronizeBeforeExecution, useSecondaryBatchBuffer, + nativeCrossTileAtomicSync); + } + commandStream.getSpace(totalProgrammedSize); +} + +} // namespace NEO diff --git a/shared/source/command_container/walker_partition_xehp_plus.h b/shared/source/command_container/walker_partition_xehp_plus.h new file mode 100644 index 0000000000..b9ccb475b9 --- /dev/null +++ b/shared/source/command_container/walker_partition_xehp_plus.h @@ -0,0 +1,730 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once + +#include "shared/source/command_container/command_encoder.h" +#include "shared/source/debug_settings/debug_settings_manager.h" +#include "shared/source/helpers/basic_math.h" +#include "shared/source/helpers/hw_helper.h" +#include "shared/source/helpers/ptr_math.h" + +#include +#include + +namespace WalkerPartition { + +template +using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER; +template +using POSTSYNC_DATA = typename GfxFamily::POSTSYNC_DATA; +template +using BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START; +template +using BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END; +template +using LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM; +template +using LOAD_REGISTER_MEM = typename GfxFamily::MI_LOAD_REGISTER_MEM; +template +using MI_SET_PREDICATE = typename GfxFamily::MI_SET_PREDICATE; +template +using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT; +template +using MI_ATOMIC = typename GfxFamily::MI_ATOMIC; +template +using DATA_SIZE = typename GfxFamily::MI_ATOMIC::DATA_SIZE; +template +using LOAD_REGISTER_REG = typename GfxFamily::MI_LOAD_REGISTER_REG; +template +using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; +template +using MI_STORE_DATA_IMM = typename GfxFamily::MI_STORE_DATA_IMM; + +constexpr uint32_t wparidCCSOffset = 0x221C; +constexpr uint32_t addressOffsetCCSOffset = 0x23B4; +constexpr uint32_t predicationMaskCCSOffset = 0x21FC; + +constexpr uint32_t generalPurposeRegister0 = 0x2600; +constexpr uint32_t generalPurposeRegister1 = 0x2608; +constexpr uint32_t generalPurposeRegister2 = 0x2610; +constexpr uint32_t generalPurposeRegister3 = 0x2618; +constexpr uint32_t generalPurposeRegister4 = 0x2620; +constexpr uint32_t generalPurposeRegister5 = 0x2628; +constexpr uint32_t generalPurposeRegister6 = 0x2630; + +struct BatchBufferControlData { + uint32_t partitionCount = 0u; + uint32_t tileCount = 0u; + uint32_t inTileCount = 0u; + uint32_t finalSyncTileCount = 0u; +}; +static constexpr inline size_t dynamicPartitioningFieldsForCleanupCount = sizeof(BatchBufferControlData) / sizeof(uint32_t) - 1; + +template +Command *putCommand(void *&inputAddress, uint32_t &totalBytesProgrammed) { + totalBytesProgrammed += sizeof(Command); + auto commandToReturn = reinterpret_cast(inputAddress); + inputAddress = ptrOffset(inputAddress, sizeof(Command)); + return commandToReturn; +} + +bool inline isSemaphoreProgrammingRequired() { + auto semaphoreProgrammingRequired = false; + if (NEO::DebugManager.flags.ExperimentalSynchronizeWithSemaphores.get() == 1) { + semaphoreProgrammingRequired = true; + } + return semaphoreProgrammingRequired; +} + +bool inline isCrossTileAtomicRequired() { + auto crossTileAtomicSynchronization = true; + if (NEO::DebugManager.flags.ExperimentalForceCrossAtomicSynchronization.get() == 0) { + crossTileAtomicSynchronization = false; + } + return crossTileAtomicSynchronization; +} + +template +uint32_t computePartitionCountAndPartitionType(uint32_t preferredMinimalPartitionCount, + bool preferStaticPartitioning, + Vec3 groupStart, + Vec3 groupCount, + std::optional::PARTITION_TYPE> requestedPartitionType, + typename COMPUTE_WALKER::PARTITION_TYPE *outSelectedPartitionType, + bool *outSelectStaticPartitioning) { + // For non uniform starting point, there is no support for partition in Hardware. Disable partitioning and select dynamic algorithm + if (groupStart.x || groupStart.y || groupStart.z) { + *outSelectedPartitionType = COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED; + *outSelectStaticPartitioning = false; + return 1u; + } + + size_t workgroupCount = 0u; + bool disablePartitionForPartitionCountOne{}; + + if (NEO::DebugManager.flags.ExperimentalSetWalkerPartitionType.get() != -1) { + requestedPartitionType = static_cast::PARTITION_TYPE>(NEO::DebugManager.flags.ExperimentalSetWalkerPartitionType.get()); + } + + if (requestedPartitionType.has_value()) { + switch (requestedPartitionType.value()) { + case COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X: + workgroupCount = groupCount.x; + break; + case COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y: + workgroupCount = groupCount.y; + break; + case COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z: + workgroupCount = groupCount.z; + break; + default: + UNRECOVERABLE_IF(true); + } + *outSelectedPartitionType = requestedPartitionType.value(); + disablePartitionForPartitionCountOne = false; + } else { + const size_t maxDimension = std::max({groupCount.z, groupCount.y, groupCount.x}); + + auto goWithMaxAlgorithm = !preferStaticPartitioning; + if (NEO::DebugManager.flags.WalkerPartitionPreferHighestDimension.get() != -1) { + goWithMaxAlgorithm = !!!NEO::DebugManager.flags.WalkerPartitionPreferHighestDimension.get(); + } + + //compute misaligned %, accept imbalance below threshold in favor of Z/Y/X distribution. + const float minimalThreshold = 0.05f; + float zImbalance = static_cast(groupCount.z - alignDown(groupCount.z, preferredMinimalPartitionCount)) / static_cast(groupCount.z); + float yImbalance = static_cast(groupCount.y - alignDown(groupCount.y, preferredMinimalPartitionCount)) / static_cast(groupCount.y); + + //we first try with deepest dimension to see if we can partition there + if (groupCount.z > 1 && (zImbalance <= minimalThreshold)) { + *outSelectedPartitionType = COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z; + } else if (groupCount.y > 1 && (yImbalance < minimalThreshold)) { + *outSelectedPartitionType = COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y; + } else if (groupCount.x % preferredMinimalPartitionCount == 0) { + *outSelectedPartitionType = COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X; + } + //if we are here then there is no dimension that results in even distribution, choose max dimension to minimize impact + else { + goWithMaxAlgorithm = true; + } + + if (goWithMaxAlgorithm) { + // default mode, select greatest dimension + if (maxDimension == groupCount.x) { + *outSelectedPartitionType = COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X; + } else if (maxDimension == groupCount.y) { + *outSelectedPartitionType = COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y; + } else { + *outSelectedPartitionType = COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z; + } + } + + workgroupCount = maxDimension; + disablePartitionForPartitionCountOne = true; + } + + // Static partitioning - partition count == tile count + *outSelectStaticPartitioning = preferStaticPartitioning; + if (preferStaticPartitioning) { + return preferredMinimalPartitionCount; + } + + // Dynamic partitioning - compute optimal partition count + size_t partitionCount = std::min(static_cast(16u), workgroupCount); + partitionCount = Math::prevPowerOfTwo(partitionCount); + if (NEO::DebugManager.flags.SetMinimalPartitionSize.get() != 0) { + const auto workgroupPerPartitionThreshold = NEO::DebugManager.flags.SetMinimalPartitionSize.get() == -1 + ? 512u + : static_cast(NEO::DebugManager.flags.SetMinimalPartitionSize.get()); + preferredMinimalPartitionCount = std::max(2u, preferredMinimalPartitionCount); + + while (partitionCount > preferredMinimalPartitionCount) { + auto workgroupsPerPartition = workgroupCount / partitionCount; + if (workgroupsPerPartition >= workgroupPerPartitionThreshold) { + break; + } + partitionCount = partitionCount / 2; + } + } + + if (partitionCount == 1u && disablePartitionForPartitionCountOne) { + *outSelectedPartitionType = COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED; + } + + return static_cast(partitionCount); +} + +template +uint32_t computePartitionCountAndSetPartitionType(COMPUTE_WALKER *walker, + uint32_t preferredMinimalPartitionCount, + bool preferStaticPartitioning, + bool usesImages, + bool *outSelectStaticPartitioning) { + const Vec3 groupStart = {walker->getThreadGroupIdStartingX(), walker->getThreadGroupIdStartingY(), walker->getThreadGroupIdStartingZ()}; + const Vec3 groupCount = {walker->getThreadGroupIdXDimension(), walker->getThreadGroupIdYDimension(), walker->getThreadGroupIdZDimension()}; + std::optional::PARTITION_TYPE> requestedPartitionType{}; + if (usesImages) { + requestedPartitionType = COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X; + } + typename COMPUTE_WALKER::PARTITION_TYPE partitionType{}; + const auto partitionCount = computePartitionCountAndPartitionType(preferredMinimalPartitionCount, + preferStaticPartitioning, + groupStart, + groupCount, + requestedPartitionType, + &partitionType, + outSelectStaticPartitioning); + walker->setPartitionType(partitionType); + return partitionCount; +} + +template +void programRegisterWithValue(void *&inputAddress, uint32_t registerOffset, uint32_t &totalBytesProgrammed, uint32_t registerValue) { + auto loadRegisterImmediate = putCommand>(inputAddress, totalBytesProgrammed); + LOAD_REGISTER_IMM cmd = GfxFamily::cmdInitLoadRegisterImm; + + cmd.setRegisterOffset(registerOffset); + cmd.setDataDword(registerValue); + cmd.setMmioRemapEnable(true); + *loadRegisterImmediate = cmd; +} + +template +void programWaitForSemaphore(void *&inputAddress, uint32_t &totalBytesProgrammed, uint64_t gpuAddress, uint32_t semaphoreCompareValue, typename MI_SEMAPHORE_WAIT::COMPARE_OPERATION compareOperation) { + auto semaphoreWait = putCommand>(inputAddress, totalBytesProgrammed); + MI_SEMAPHORE_WAIT cmd = GfxFamily::cmdInitMiSemaphoreWait; + + cmd.setSemaphoreDataDword(semaphoreCompareValue); + cmd.setSemaphoreGraphicsAddress(gpuAddress); + cmd.setWaitMode(MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE); + cmd.setCompareOperation(compareOperation); + *semaphoreWait = cmd; +} + +template +bool programWparidMask(void *&inputAddress, uint32_t &totalBytesProgrammed, uint32_t partitionCount) { + //currently only power of 2 values of partitionCount are being supported + if (!Math::isPow2(partitionCount) || partitionCount > 16) { + return false; + } + + auto mask = 0xFFE0; + auto fillValue = 0x10; + auto count = partitionCount; + while (count < 16) { + fillValue |= (fillValue >> 1); + count *= 2; + } + mask |= (mask | fillValue); + + programRegisterWithValue(inputAddress, predicationMaskCCSOffset, totalBytesProgrammed, mask); + return true; +} + +template +void programWparidPredication(void *&inputAddress, uint32_t &totalBytesProgrammed, bool predicationEnabled) { + auto miSetPredicate = putCommand>(inputAddress, totalBytesProgrammed); + MI_SET_PREDICATE cmd = GfxFamily::cmdInitSetPredicate; + + if (predicationEnabled) { + cmd.setPredicateEnableWparid(MI_SET_PREDICATE::PREDICATE_ENABLE_WPARID::PREDICATE_ENABLE_WPARID_NOOP_ON_NON_ZERO_VALUE); + } else { + cmd.setPredicateEnable(MI_SET_PREDICATE::PREDICATE_ENABLE::PREDICATE_ENABLE_PREDICATE_DISABLE); + } + *miSetPredicate = cmd; +} + +template +void programMiAtomic(void *&inputAddress, uint32_t &totalBytesProgrammed, uint64_t gpuAddress, bool requireReturnValue, typename MI_ATOMIC::ATOMIC_OPCODES atomicOpcode) { + auto miAtomic = putCommand>(inputAddress, totalBytesProgrammed); + NEO::EncodeAtomic::programMiAtomic(miAtomic, gpuAddress, atomicOpcode, DATA_SIZE::DATA_SIZE_DWORD, + requireReturnValue, requireReturnValue, 0x0u, 0x0u); +} + +template +void programMiBatchBufferStart(void *&inputAddress, uint32_t &totalBytesProgrammed, + uint64_t gpuAddress, bool predicationEnabled, bool secondary) { + auto batchBufferStart = putCommand>(inputAddress, totalBytesProgrammed); + BATCH_BUFFER_START cmd = GfxFamily::cmdInitBatchBufferStart; + + cmd.setSecondLevelBatchBuffer(static_cast::SECOND_LEVEL_BATCH_BUFFER>(secondary)); + cmd.setAddressSpaceIndicator(BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR::ADDRESS_SPACE_INDICATOR_PPGTT); + cmd.setPredicationEnable(predicationEnabled); + cmd.setBatchBufferStartAddress(gpuAddress); + *batchBufferStart = cmd; +} + +template +void programMiLoadRegisterReg(void *&inputAddress, uint32_t &totalBytesProgrammed, uint32_t sourceRegisterOffset, uint32_t destinationRegisterOffset) { + auto loadRegisterReg = putCommand>(inputAddress, totalBytesProgrammed); + LOAD_REGISTER_REG cmd = GfxFamily::cmdInitLoadRegisterReg; + + cmd.setMmioRemapEnableSource(true); + cmd.setMmioRemapEnableDestination(true); + cmd.setSourceRegisterAddress(sourceRegisterOffset); + cmd.setDestinationRegisterAddress(destinationRegisterOffset); + *loadRegisterReg = cmd; +} + +template +void programMiLoadRegisterMem(void *&inputAddress, uint32_t &totalBytesProgrammed, uint64_t gpuAddressToLoad, uint32_t destinationRegisterOffset) { + auto loadRegisterReg = putCommand>(inputAddress, totalBytesProgrammed); + LOAD_REGISTER_MEM cmd = GfxFamily::cmdInitLoadRegisterMem; + + cmd.setMmioRemapEnable(true); + cmd.setMemoryAddress(gpuAddressToLoad); + cmd.setRegisterAddress(destinationRegisterOffset); + *loadRegisterReg = cmd; +} + +template +void programPipeControlCommand(void *&inputAddress, uint32_t &totalBytesProgrammed, bool dcFlush) { + auto pipeControl = putCommand>(inputAddress, totalBytesProgrammed); + PIPE_CONTROL cmd = GfxFamily::cmdInitPipeControl; + + if (NEO::MemorySynchronizationCommands::isDcFlushAllowed()) { + cmd.setDcFlushEnable(dcFlush); + } + if (NEO::DebugManager.flags.DoNotFlushCaches.get()) { + cmd.setDcFlushEnable(false); + } + *pipeControl = cmd; +} + +template +void programStoreMemImmediateDword(void *&inputAddress, uint32_t &totalBytesProgrammed, uint64_t gpuAddress, uint32_t data) { + auto storeDataImmediate = putCommand>(inputAddress, totalBytesProgrammed); + MI_STORE_DATA_IMM cmd = GfxFamily::cmdInitStoreDataImm; + + cmd.setAddress(gpuAddress); + cmd.setStoreQword(false); + cmd.setDwordLength(MI_STORE_DATA_IMM::DWORD_LENGTH::DWORD_LENGTH_STORE_DWORD); + cmd.setDataDword0(static_cast(data)); + + *storeDataImmediate = cmd; +} + +template +void programNativeCrossTileSyncControl(void *&inputAddress, + uint32_t &totalBytesProgrammed, + uint64_t finalSyncTileCountField) { + programStoreMemImmediateDword(inputAddress, + totalBytesProgrammed, + finalSyncTileCountField, + 0u); +} + +template +void programNativeCrossTileSyncCleanup(void *&inputAddress, + uint32_t &totalBytesProgrammed, + uint64_t finalSyncTileCountAddress, + uint64_t baseAddressForCleanup, + size_t fieldsForCleanupCount, + uint32_t tileCount) { + // Synchronize tiles, so the fields are not cleared while still in use + programMiAtomic(inputAddress, totalBytesProgrammed, finalSyncTileCountAddress, false, MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT); + programWaitForSemaphore(inputAddress, totalBytesProgrammed, finalSyncTileCountAddress, tileCount, MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); + + for (auto fieldIndex = 0u; fieldIndex < fieldsForCleanupCount; fieldIndex++) { + const uint64_t addressForCleanup = baseAddressForCleanup + fieldIndex * sizeof(uint32_t); + programStoreMemImmediateDword(inputAddress, + totalBytesProgrammed, + addressForCleanup, + 0u); + } + + //this synchronization point ensures that all tiles finished zeroing and will fairly access control section atomic variables + programMiAtomic(inputAddress, totalBytesProgrammed, finalSyncTileCountAddress, false, MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT); + programWaitForSemaphore(inputAddress, totalBytesProgrammed, finalSyncTileCountAddress, 2 * tileCount, MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); +} + +template +void programTilesSynchronizationWithPostSyncs(void *¤tBatchBufferPointer, + uint32_t &totalBytesProgrammed, + COMPUTE_WALKER *inputWalker, + uint32_t partitionCount) { + const auto postSyncAddress = inputWalker->getPostSync().getDestinationAddress() + 8llu; + for (uint32_t partitionId = 0u; partitionId < partitionCount; partitionId++) { + programWaitForSemaphore(currentBatchBufferPointer, totalBytesProgrammed, postSyncAddress + partitionId * 16llu, 1u, MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); + } +} + +template +void programTilesSynchronizationWithAtomics(void *¤tBatchBufferPointer, + uint32_t &totalBytesProgrammed, + uint64_t atomicAddress, + uint32_t tileCount) { + programMiAtomic(currentBatchBufferPointer, totalBytesProgrammed, atomicAddress, false, MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT); + programWaitForSemaphore(currentBatchBufferPointer, totalBytesProgrammed, atomicAddress, tileCount, MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); +} + +template +uint64_t computeWalkerSectionSize() { + return sizeof(BATCH_BUFFER_START) + + sizeof(COMPUTE_WALKER); +} + +template +uint64_t computeNativeCrossTileSyncControlSectionSize() { + return sizeof(MI_STORE_DATA_IMM); +} + +template +uint64_t computeNativeCrossTileSyncCleanupSectionSize(size_t fieldsForCleanupCount) { + return fieldsForCleanupCount * sizeof(MI_STORE_DATA_IMM) + + 2 * sizeof(MI_ATOMIC) + + 2 * sizeof(MI_SEMAPHORE_WAIT); +} + +template +uint64_t computeControlSectionOffset(uint32_t partitionCount, bool synchronizeBeforeExecution, bool nativeCrossTileAtomicSync) { + auto synchronizationCount = (synchronizeBeforeExecution) ? 2u : 1u; + if (!isCrossTileAtomicRequired() && !nativeCrossTileAtomicSync) { + synchronizationCount--; + } + + return sizeof(LOAD_REGISTER_IMM) + + sizeof(MI_ATOMIC) * (1u + synchronizationCount) + + sizeof(LOAD_REGISTER_REG) + + sizeof(MI_SET_PREDICATE) * 2 + + sizeof(BATCH_BUFFER_START) * 2 + + sizeof(PIPE_CONTROL) + + sizeof(MI_SEMAPHORE_WAIT) * synchronizationCount + + (isSemaphoreProgrammingRequired() ? sizeof(MI_SEMAPHORE_WAIT) * partitionCount : 0u) + + computeWalkerSectionSize() + + (nativeCrossTileAtomicSync ? computeNativeCrossTileSyncControlSectionSize() : 0u); +} + +template +uint64_t computeWalkerSectionStart(uint32_t partitionCount, + bool synchronizeBeforeExecution, + bool nativeCrossTileAtomicSync) { + return computeControlSectionOffset(partitionCount, synchronizeBeforeExecution, nativeCrossTileAtomicSync) - + computeWalkerSectionSize(); +} + +template +void programPartitionedWalker(void *&inputAddress, uint32_t &totalBytesProgrammed, + COMPUTE_WALKER *inputWalker, + uint32_t partitionCount) { + auto computeWalker = putCommand>(inputAddress, totalBytesProgrammed); + COMPUTE_WALKER cmd = *inputWalker; + + if (partitionCount > 1) { + auto partitionType = inputWalker->getPartitionType(); + + assert(inputWalker->getThreadGroupIdStartingX() == 0u); + assert(inputWalker->getThreadGroupIdStartingY() == 0u); + assert(inputWalker->getThreadGroupIdStartingZ() == 0u); + assert(partitionType != COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_DISABLED); + + cmd.setWorkloadPartitionEnable(true); + + auto workgroupCount = 0u; + if (partitionType == COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X) { + workgroupCount = inputWalker->getThreadGroupIdXDimension(); + } else if (partitionType == COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Y) { + workgroupCount = inputWalker->getThreadGroupIdYDimension(); + } else { + workgroupCount = inputWalker->getThreadGroupIdZDimension(); + } + + cmd.setPartitionSize((workgroupCount + partitionCount - 1u) / partitionCount); + } + *computeWalker = cmd; +} + +/* SAMPLE COMMAND BUFFER STRUCTURE, birds eye view for 16 partitions, 4 tiles +//inital setup section +1. MI_LOAD_REGISTER(PREDICATION_MASK, active partition mask ) +//loop 1 - loop as long as there are partitions to be serviced +2. MI_ATOMIC_INC( ATOMIC LOCATION #31 within CMD buffer ) +3. MI_LOAD_REGISTER_REG ( ATOMIC RESULT -> WPARID ) +4. MI_SET_PREDICATE( WPARID MODE ) +5. BATCH_BUFFER_START( LOCATION #28 ) // this will not be executed if partition outside of active virtual partitions +//loop 1 ends here, if we are here it means there are no more partitions +6. MI_SET_PREDICATE ( OFF ) +//Walker synchronization section starts here, make sure that Walker is done +7, PIPE_CONTROL ( DC_FLUSH ) +//wait for all post syncs to make sure whole work is done, caller needs to set them to 1. +//now epilogue starts synchro all engines prior to coming back to RING, this will be once per command buffer to make sure that all engines actually passed via cmd buffer. +//epilogue section, make sure every tile completed prior to continuing +//This is cross-tile synchronization +24. ATOMIC_INC( LOCATION #31) +25. WAIT_FOR_SEMAPHORE ( LOCATION #31, LOWER THEN 4 ) // wait till all tiles hit atomic +26. PIPE_CONTROL ( TAG UPDATE ) (not implemented) +27. BATCH_BUFFER_STAT (LOCATION #32) // go to the very end +//Walker section +28. COMPUTE_WALKER +29. BATCH BUFFER_START ( GO BACK TO #2) +//Batch Buffer Control Data section, there are no real commands here but we have memory here +//That will be updated via atomic operations. +30. uint32_t virtualPartitionID //atomic location +31. uint32_t completionTileID //all tiles needs to report completion +32. BATCH_BUFFER_END ( optional ) +*/ + +template +void constructDynamicallyPartitionedCommandBuffer(void *cpuPointer, + uint64_t gpuAddressOfAllocation, + COMPUTE_WALKER *inputWalker, + uint32_t &totalBytesProgrammed, + uint32_t partitionCount, + uint32_t tileCount, + bool emitBatchBufferEnd, + bool synchronizeBeforeExecution, + bool secondaryBatchBuffer, + bool nativeCrossTileAtomicSync) { + totalBytesProgrammed = 0u; + void *currentBatchBufferPointer = cpuPointer; + + auto controlSectionOffset = computeControlSectionOffset(partitionCount, synchronizeBeforeExecution, nativeCrossTileAtomicSync); + if (synchronizeBeforeExecution) { + auto tileAtomicAddress = gpuAddressOfAllocation + controlSectionOffset + offsetof(BatchBufferControlData, inTileCount); + programMiAtomic(currentBatchBufferPointer, totalBytesProgrammed, tileAtomicAddress, false, MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT); + + //if all tiles hit the atomic, it means we may go further + programWaitForSemaphore(currentBatchBufferPointer, totalBytesProgrammed, tileAtomicAddress, tileCount, MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); + } + + programWparidMask(currentBatchBufferPointer, totalBytesProgrammed, partitionCount); + + programMiAtomic(currentBatchBufferPointer, + totalBytesProgrammed, + gpuAddressOfAllocation + controlSectionOffset, + true, + MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT); + + //move atomic result to wparid + programMiLoadRegisterReg(currentBatchBufferPointer, totalBytesProgrammed, generalPurposeRegister4, wparidCCSOffset); + + //enable predication basing on wparid value + programWparidPredication(currentBatchBufferPointer, totalBytesProgrammed, true); + + programMiBatchBufferStart(currentBatchBufferPointer, + totalBytesProgrammed, + gpuAddressOfAllocation + + computeWalkerSectionStart(partitionCount, + synchronizeBeforeExecution, + nativeCrossTileAtomicSync), + true, + secondaryBatchBuffer); + + //disable predication to not noop subsequent commands. + programWparidPredication(currentBatchBufferPointer, totalBytesProgrammed, false); + + if (nativeCrossTileAtomicSync) { + const auto finalSyncTileCountField = gpuAddressOfAllocation + controlSectionOffset + offsetof(BatchBufferControlData, finalSyncTileCount); + programNativeCrossTileSyncControl(currentBatchBufferPointer, totalBytesProgrammed, finalSyncTileCountField); + } + + programPipeControlCommand(currentBatchBufferPointer, totalBytesProgrammed, true); + + if (isSemaphoreProgrammingRequired()) { + auto postSyncAddress = inputWalker->getPostSync().getDestinationAddress() + 8llu; + for (uint32_t partitionId = 0u; partitionId < partitionCount; partitionId++) { + programWaitForSemaphore(currentBatchBufferPointer, totalBytesProgrammed, postSyncAddress + partitionId * 16llu, 1u, MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); + } + } + + if (isCrossTileAtomicRequired() || nativeCrossTileAtomicSync) { + auto tileAtomicAddress = gpuAddressOfAllocation + controlSectionOffset + offsetof(BatchBufferControlData, tileCount); + programMiAtomic(currentBatchBufferPointer, totalBytesProgrammed, tileAtomicAddress, false, MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT); + + //if all tiles hit the atomic, it means we may go further + programWaitForSemaphore(currentBatchBufferPointer, totalBytesProgrammed, tileAtomicAddress, tileCount, MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); + } + + //this bb start goes to the end of partitioned command buffer + programMiBatchBufferStart( + currentBatchBufferPointer, + totalBytesProgrammed, + gpuAddressOfAllocation + controlSectionOffset + sizeof(BatchBufferControlData), + false, + secondaryBatchBuffer); + + //Walker section + programPartitionedWalker(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, partitionCount); + + programMiBatchBufferStart(currentBatchBufferPointer, totalBytesProgrammed, gpuAddressOfAllocation, false, secondaryBatchBuffer); + + auto controlSection = reinterpret_cast(ptrOffset(cpuPointer, static_cast(controlSectionOffset))); + controlSection->partitionCount = 0u; + controlSection->tileCount = 0u; + controlSection->inTileCount = 0u; + controlSection->finalSyncTileCount = 0u; + totalBytesProgrammed += sizeof(BatchBufferControlData); + currentBatchBufferPointer = ptrOffset(currentBatchBufferPointer, sizeof(BatchBufferControlData)); + + if (nativeCrossTileAtomicSync) { + const auto finalSyncTileCountAddress = gpuAddressOfAllocation + controlSectionOffset + offsetof(BatchBufferControlData, finalSyncTileCount); + programNativeCrossTileSyncCleanup(currentBatchBufferPointer, + totalBytesProgrammed, + finalSyncTileCountAddress, + gpuAddressOfAllocation + controlSectionOffset, + dynamicPartitioningFieldsForCleanupCount, + tileCount); + } + + if (emitBatchBufferEnd) { + auto batchBufferEnd = putCommand>(currentBatchBufferPointer, totalBytesProgrammed); + *batchBufferEnd = GfxFamily::cmdInitBatchBufferEnd; + } +} + +struct StaticPartitioningControlSection { + uint32_t synchronizeBeforeWalkerCounter = 0; + uint32_t synchronizeAfterWalkerCounter = 0; + uint32_t finalSyncTileCounter = 0; +}; +static constexpr inline size_t staticPartitioningFieldsForCleanupCount = sizeof(StaticPartitioningControlSection) / sizeof(uint32_t) - 1; + +template +uint64_t computeStaticPartitioningControlSectionOffset(uint32_t partitionCount, bool synchronizeBeforeExecution, bool nativeCrossTileAtomicSync) { + const auto beforeExecutionSyncAtomicSize = synchronizeBeforeExecution ? (sizeof(MI_SEMAPHORE_WAIT) + sizeof(MI_ATOMIC)) : 0u; + const auto afterExecutionSyncAtomicSize = (isCrossTileAtomicRequired() || nativeCrossTileAtomicSync) ? (sizeof(MI_SEMAPHORE_WAIT) + sizeof(MI_ATOMIC)) : 0u; + const auto afterExecutionSyncPostSyncSize = isSemaphoreProgrammingRequired() ? sizeof(MI_SEMAPHORE_WAIT) * partitionCount : 0u; + const auto nativeCrossTileSyncSize = nativeCrossTileAtomicSync ? sizeof(MI_STORE_DATA_IMM) : 0u; + return beforeExecutionSyncAtomicSize + + sizeof(LOAD_REGISTER_MEM) + + sizeof(PIPE_CONTROL) + + sizeof(COMPUTE_WALKER) + + nativeCrossTileSyncSize + + afterExecutionSyncAtomicSize + + afterExecutionSyncPostSyncSize + + sizeof(BATCH_BUFFER_START); +} + +template +void constructStaticallyPartitionedCommandBuffer(void *cpuPointer, + uint64_t gpuAddressOfAllocation, + COMPUTE_WALKER *inputWalker, + uint32_t &totalBytesProgrammed, + uint32_t partitionCount, + uint32_t tileCount, + bool synchronizeBeforeExecution, + bool secondaryBatchBuffer, + bool nativeCrossTileAtomicSync, + uint64_t workPartitionAllocationGpuVa) { + totalBytesProgrammed = 0u; + void *currentBatchBufferPointer = cpuPointer; + + // Get address of the control section + const auto controlSectionOffset = computeStaticPartitioningControlSectionOffset(partitionCount, synchronizeBeforeExecution, nativeCrossTileAtomicSync); + const auto afterControlSectionOffset = controlSectionOffset + sizeof(StaticPartitioningControlSection); + + // Synchronize tiles before walker + if (synchronizeBeforeExecution) { + const auto atomicAddress = gpuAddressOfAllocation + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeBeforeWalkerCounter); + programTilesSynchronizationWithAtomics(currentBatchBufferPointer, totalBytesProgrammed, atomicAddress, tileCount); + } + + // Load partition ID to wparid register and execute walker + programMiLoadRegisterMem(currentBatchBufferPointer, totalBytesProgrammed, workPartitionAllocationGpuVa, wparidCCSOffset); + programPartitionedWalker(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, partitionCount); + programPipeControlCommand(currentBatchBufferPointer, totalBytesProgrammed, true); // flush L3 cache + + // Prepare for cleanup section + if (nativeCrossTileAtomicSync) { + const auto finalSyncTileCountField = gpuAddressOfAllocation + controlSectionOffset + offsetof(StaticPartitioningControlSection, finalSyncTileCounter); + programNativeCrossTileSyncControl(currentBatchBufferPointer, totalBytesProgrammed, finalSyncTileCountField); + } + + // Synchronize tiles after walker + if (isSemaphoreProgrammingRequired()) { + programTilesSynchronizationWithPostSyncs(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, partitionCount); + } + if (isCrossTileAtomicRequired() || nativeCrossTileAtomicSync) { + const auto atomicAddress = gpuAddressOfAllocation + controlSectionOffset + offsetof(StaticPartitioningControlSection, synchronizeAfterWalkerCounter); + programTilesSynchronizationWithAtomics(currentBatchBufferPointer, totalBytesProgrammed, atomicAddress, tileCount); + } + + // Jump over the control section + programMiBatchBufferStart(currentBatchBufferPointer, totalBytesProgrammed, gpuAddressOfAllocation + afterControlSectionOffset, false, secondaryBatchBuffer); + + // Control section + DEBUG_BREAK_IF(totalBytesProgrammed != controlSectionOffset); + StaticPartitioningControlSection *controlSection = putCommand(currentBatchBufferPointer, totalBytesProgrammed); + controlSection->synchronizeBeforeWalkerCounter = 0u; + controlSection->synchronizeAfterWalkerCounter = 0u; + controlSection->finalSyncTileCounter = 0u; + DEBUG_BREAK_IF(totalBytesProgrammed != afterControlSectionOffset); + + // Cleanup section + if (nativeCrossTileAtomicSync) { + const auto finalSyncTileCountAddress = gpuAddressOfAllocation + controlSectionOffset + offsetof(StaticPartitioningControlSection, finalSyncTileCounter); + programNativeCrossTileSyncCleanup(currentBatchBufferPointer, + totalBytesProgrammed, + finalSyncTileCountAddress, + gpuAddressOfAllocation + controlSectionOffset, + staticPartitioningFieldsForCleanupCount, + tileCount); + } +} + +template +uint64_t estimateSpaceRequiredInCommandBuffer(bool requiresBatchBufferEnd, + uint32_t partitionCount, + bool synchronizeBeforeExecution, + bool nativeCrossTileAtomicSync, + bool staticPartitioning) { + uint64_t size = {}; + if (staticPartitioning) { + size += computeStaticPartitioningControlSectionOffset(partitionCount, synchronizeBeforeExecution, nativeCrossTileAtomicSync); + size += sizeof(StaticPartitioningControlSection); + size += nativeCrossTileAtomicSync ? computeNativeCrossTileSyncCleanupSectionSize(staticPartitioningFieldsForCleanupCount) : 0u; + } else { + size += computeControlSectionOffset(partitionCount, synchronizeBeforeExecution, nativeCrossTileAtomicSync); + size += sizeof(BatchBufferControlData); + size += requiresBatchBufferEnd ? sizeof(BATCH_BUFFER_END) : 0u; + size += nativeCrossTileAtomicSync ? computeNativeCrossTileSyncCleanupSectionSize(dynamicPartitioningFieldsForCleanupCount) : 0u; + } + return size; +} + +} // namespace WalkerPartition diff --git a/shared/source/command_stream/CMakeLists.txt b/shared/source/command_stream/CMakeLists.txt index 62b18cd366..08f8c09c04 100644 --- a/shared/source/command_stream/CMakeLists.txt +++ b/shared/source/command_stream/CMakeLists.txt @@ -46,5 +46,14 @@ set(NEO_CORE_COMMAND_STREAM ${CMAKE_CURRENT_SOURCE_DIR}/thread_arbitration_policy.h ) +if(SUPPORT_XEHP_PLUS) + list(APPEND NEO_CORE_COMMAND_STREAM + ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_xehp_plus.inl + ${CMAKE_CURRENT_SOURCE_DIR}/preemption_xehp_plus.inl + ${CMAKE_CURRENT_SOURCE_DIR}/scratch_space_controller_xehp_plus.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/scratch_space_controller_xehp_plus.h + ) +endif() + set_property(GLOBAL PROPERTY NEO_CORE_COMMAND_STREAM ${NEO_CORE_COMMAND_STREAM}) add_subdirectories() diff --git a/shared/source/command_stream/command_stream_receiver_hw_xehp_plus.inl b/shared/source/command_stream/command_stream_receiver_hw_xehp_plus.inl new file mode 100644 index 0000000000..47ebbe4d43 --- /dev/null +++ b/shared/source/command_stream/command_stream_receiver_hw_xehp_plus.inl @@ -0,0 +1,126 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/command_stream/command_stream_receiver_hw_base.inl" +#include "shared/source/command_stream/device_command_stream.h" +#include "shared/source/command_stream/scratch_space_controller_xehp_plus.h" +#include "shared/source/gmm_helper/gmm.h" +#include "shared/source/helpers/preamble.h" +#include "shared/source/kernel/grf_config.h" +#include "shared/source/os_interface/os_interface.h" + +namespace NEO { + +template +size_t CommandStreamReceiverHw::getSshHeapSize() { return 2 * MB; } + +template +bool CommandStreamReceiverHw::are4GbHeapsAvailable() const { return is64bit; } + +template +void CommandStreamReceiverHw::programL3(LinearStream &csr, DispatchFlags &dispatchFlags, uint32_t &newL3Config) {} + +template +size_t CommandStreamReceiverHw::getRequiredStateBaseAddressSize() const { + return sizeof(typename GfxFamily::STATE_BASE_ADDRESS) + sizeof(typename GfxFamily::_3DSTATE_BINDING_TABLE_POOL_ALLOC) + + sizeof(PIPE_CONTROL); +} + +template +size_t CommandStreamReceiverHw::getCmdSizeForL3Config() const { return 0; } + +template +size_t CommandStreamReceiverHw::getCmdSizeForComputeMode() { + if (!csrSizeRequestFlags.hasSharedHandles) { + for (const auto &allocation : this->getResidencyAllocations()) { + if (allocation->peekSharedHandle()) { + csrSizeRequestFlags.hasSharedHandles = true; + break; + } + } + } + + size_t size = 0; + if (isComputeModeNeeded()) { + size += sizeof(typename GfxFamily::STATE_COMPUTE_MODE); + if (csrSizeRequestFlags.hasSharedHandles) { + size += sizeof(typename GfxFamily::PIPE_CONTROL); + } + } + return size; +} + +template +void CommandStreamReceiverHw::programPipelineSelect(LinearStream &commandStream, PipelineSelectArgs &pipelineSelectArgs) { + if (csrSizeRequestFlags.mediaSamplerConfigChanged || csrSizeRequestFlags.specialPipelineSelectModeChanged || !isPreambleSent) { + PreambleHelper::programPipelineSelect(&commandStream, pipelineSelectArgs, peekHwInfo()); + this->lastMediaSamplerConfig = pipelineSelectArgs.mediaSamplerRequired; + this->lastSpecialPipelineSelectMode = pipelineSelectArgs.specialPipelineSelectMode; + } +} + +template +void CommandStreamReceiverHw::createScratchSpaceController() { + scratchSpaceController = std::make_unique(this->rootDeviceIndex, executionEnvironment, *internalAllocationStorage.get()); +} + +template +void CommandStreamReceiverHw::programEpliogueCommands(LinearStream &csr, const DispatchFlags &dispatchFlags) { + this->programEngineModeEpliogue(csr, dispatchFlags); +} + +template +size_t CommandStreamReceiverHw::getCmdSizeForEpilogueCommands(const DispatchFlags &dispatchFlags) const { + return this->getCmdSizeForEngineMode(dispatchFlags); +} + +template +bool CommandStreamReceiverHw::isMultiOsContextCapable() const { + return deviceBitfield.count() > 1u; +} + +template +class ImplicitFlushSettings { + public: + static bool &getSettingForNewResource() { + return defaultSettingForNewResource; + } + static bool &getSettingForGpuIdle() { + return defaultSettingForGpuIdle; + } + + private: + static bool defaultSettingForNewResource; + static bool defaultSettingForGpuIdle; +}; + +template +bool CommandStreamReceiverHw::checkPlatformSupportsNewResourceImplicitFlush() const { + if (this->isMultiOsContextCapable()) { + return false; + } + return ImplicitFlushSettings::getSettingForNewResource() + ? getOSInterface()->newResourceImplicitFlush + : false; +} + +template +bool CommandStreamReceiverHw::checkPlatformSupportsGpuIdleImplicitFlush() const { + if (this->isMultiOsContextCapable() && !this->osContext->isDirectSubmissionActive()) { + return false; + } + return ImplicitFlushSettings::getSettingForGpuIdle() + ? getOSInterface()->gpuIdleImplicitFlush + : false; +} + +template +GraphicsAllocation *CommandStreamReceiverHw::getClearColorAllocation() { + return nullptr; +} + +} // namespace NEO diff --git a/shared/source/command_stream/preemption_xehp_plus.inl b/shared/source/command_stream/preemption_xehp_plus.inl new file mode 100644 index 0000000000..85e5c7096d --- /dev/null +++ b/shared/source/command_stream/preemption_xehp_plus.inl @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2016-2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +template <> +void PreemptionHelper::programCsrBaseAddress(LinearStream &preambleCmdStream, Device &device, const GraphicsAllocation *preemptionCsr) { +} + +template <> +void PreemptionHelper::programStateSip(LinearStream &preambleCmdStream, Device &device) { + using STATE_SIP = typename GfxFamily::STATE_SIP; + using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM; + bool debuggingEnabled = device.getDebugger() != nullptr; + + if (debuggingEnabled) { + HwHelper &hwHelper = HwHelper::get(device.getHardwareInfo().platform.eRenderCoreFamily); + auto sipAllocation = SipKernel::getSipKernel(device).getSipAllocation(); + + if (hwHelper.isSipWANeeded(device.getHardwareInfo())) { + auto mmio = reinterpret_cast(preambleCmdStream.getSpace(sizeof(MI_LOAD_REGISTER_IMM))); + MI_LOAD_REGISTER_IMM cmd = GfxFamily::cmdInitLoadRegisterImm; + + UNRECOVERABLE_IF((sipAllocation->getGpuAddressToPatch() & uint64_t(0xffffffff00000000)) != 0); + + uint32_t globalSip = static_cast(sipAllocation->getGpuAddressToPatch() & uint32_t(-1)); + globalSip &= 0xfffffff8; + globalSip |= 1; + cmd.setDataDword(globalSip); + cmd.setRegisterOffset(GlobalSipRegister::registerOffset); + *mmio = cmd; + } else { + auto sip = reinterpret_cast(preambleCmdStream.getSpace(sizeof(STATE_SIP))); + STATE_SIP cmd = GfxFamily::cmdInitStateSip; + cmd.setSystemInstructionPointer(sipAllocation->getGpuAddressToPatch()); + *sip = cmd; + } + } +} + +template <> +void PreemptionHelper::programStateSipEndWa(LinearStream &cmdStream, Device &device) { + using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM; + bool debuggingEnabled = device.getDebugger() != nullptr; + + if (debuggingEnabled) { + HwHelper &hwHelper = HwHelper::get(device.getHardwareInfo().platform.eRenderCoreFamily); + if (hwHelper.isSipWANeeded(device.getHardwareInfo())) { + + NEO::PipeControlArgs args(false); + NEO::MemorySynchronizationCommands::addPipeControl(cmdStream, args); + + auto mmio = reinterpret_cast(cmdStream.getSpace(sizeof(MI_LOAD_REGISTER_IMM))); + MI_LOAD_REGISTER_IMM cmd = GfxFamily::cmdInitLoadRegisterImm; + uint32_t globalSip = 0; + cmd.setDataDword(globalSip); + cmd.setRegisterOffset(GlobalSipRegister::registerOffset); + *mmio = cmd; + } + } +} + +template <> +size_t PreemptionHelper::getRequiredPreambleSize(const Device &device) { + return 0u; +} + +template <> +size_t PreemptionHelper::getRequiredStateSipCmdSize(const Device &device) { + size_t size = 0; + bool debuggingEnabled = device.getDebugger() != nullptr || device.isDebuggerActive(); + if (debuggingEnabled) { + HwHelper &hwHelper = HwHelper::get(device.getHardwareInfo().platform.eRenderCoreFamily); + + if (hwHelper.isSipWANeeded(device.getHardwareInfo())) { + size += sizeof(typename GfxFamily::PIPE_CONTROL); + size += 2 * sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM); + } else { + size += sizeof(typename GfxFamily::STATE_SIP); + } + } + return size; +} diff --git a/shared/source/command_stream/scratch_space_controller_xehp_plus.cpp b/shared/source/command_stream/scratch_space_controller_xehp_plus.cpp new file mode 100644 index 0000000000..1e460b19a8 --- /dev/null +++ b/shared/source/command_stream/scratch_space_controller_xehp_plus.cpp @@ -0,0 +1,212 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/command_stream/scratch_space_controller_xehp_plus.h" + +#include "shared/source/command_stream/command_stream_receiver.h" +#include "shared/source/debug_settings/debug_settings_manager.h" +#include "shared/source/execution_environment/execution_environment.h" +#include "shared/source/execution_environment/root_device_environment.h" +#include "shared/source/helpers/aligned_memory.h" +#include "shared/source/helpers/api_specific_config.h" +#include "shared/source/helpers/constants.h" +#include "shared/source/helpers/hw_helper.h" +#include "shared/source/memory_manager/graphics_allocation.h" +#include "shared/source/memory_manager/internal_allocation_storage.h" +#include "shared/source/memory_manager/memory_manager.h" +#include "shared/source/os_interface/os_context.h" + +namespace NEO { +ScratchSpaceControllerXeHPPlus::ScratchSpaceControllerXeHPPlus(uint32_t rootDeviceIndex, + ExecutionEnvironment &environment, + InternalAllocationStorage &allocationStorage) + : ScratchSpaceController(rootDeviceIndex, environment, allocationStorage) { + auto &hwHelper = HwHelper::get(environment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo()->platform.eRenderCoreFamily); + singleSurfaceStateSize = hwHelper.getRenderSurfaceStateSize(); + if (DebugManager.flags.EnablePrivateScratchSlot1.get() != -1) { + privateScratchSpaceSupported = !!DebugManager.flags.EnablePrivateScratchSlot1.get(); + } + if (privateScratchSpaceSupported) { + ScratchSpaceControllerXeHPPlus::stateSlotsCount *= 2; + } +} + +void ScratchSpaceControllerXeHPPlus::setNewSshPtr(void *newSsh, bool &cfeDirty, bool changeId) { + if (surfaceStateHeap != newSsh) { + surfaceStateHeap = static_cast(newSsh); + if (scratchAllocation == nullptr) { + cfeDirty = false; + } else { + if (changeId) { + slotId = 0; + } + programSurfaceState(); + cfeDirty = true; + } + } +} + +void ScratchSpaceControllerXeHPPlus::setRequiredScratchSpace(void *sshBaseAddress, + uint32_t offset, + uint32_t requiredPerThreadScratchSize, + uint32_t requiredPerThreadPrivateScratchSize, + uint32_t currentTaskCount, + OsContext &osContext, + bool &stateBaseAddressDirty, + bool &vfeStateDirty) { + setNewSshPtr(sshBaseAddress, vfeStateDirty, offset == 0 ? true : false); + bool scratchSurfaceDirty; + prepareScratchAllocation(requiredPerThreadScratchSize, requiredPerThreadPrivateScratchSize, currentTaskCount, osContext, stateBaseAddressDirty, scratchSurfaceDirty, vfeStateDirty); + if (scratchSurfaceDirty) { + vfeStateDirty = true; + updateSlots = true; + programSurfaceState(); + } +} + +void ScratchSpaceControllerXeHPPlus::programSurfaceState() { + if (updateSlots) { + slotId++; + } + UNRECOVERABLE_IF(slotId >= stateSlotsCount); + UNRECOVERABLE_IF(scratchAllocation == nullptr && privateScratchAllocation == nullptr); + + void *surfaceStateForScratchAllocation = ptrOffset(static_cast(surfaceStateHeap), getOffsetToSurfaceState(slotId + sshOffset)); + programSurfaceStateAtPtr(surfaceStateForScratchAllocation); +} + +void ScratchSpaceControllerXeHPPlus::programSurfaceStateAtPtr(void *surfaceStateForScratchAllocation) { + auto &hwHelper = HwHelper::get(executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo()->platform.eRenderCoreFamily); + uint64_t scratchAllocationAddress = 0u; + if (scratchAllocation) { + scratchAllocationAddress = scratchAllocation->getGpuAddress(); + } + hwHelper.setRenderSurfaceStateForBuffer(*executionEnvironment.rootDeviceEnvironments[rootDeviceIndex], + surfaceStateForScratchAllocation, computeUnitsUsedForScratch, scratchAllocationAddress, 0, + perThreadScratchSize, nullptr, false, scratchType, false, true); + + if (privateScratchSpaceSupported) { + void *surfaceStateForPrivateScratchAllocation = ptrOffset(surfaceStateForScratchAllocation, singleSurfaceStateSize); + uint64_t privateScratchAllocationAddress = 0u; + + if (privateScratchAllocation) { + privateScratchAllocationAddress = privateScratchAllocation->getGpuAddress(); + } + hwHelper.setRenderSurfaceStateForBuffer(*executionEnvironment.rootDeviceEnvironments[rootDeviceIndex], + surfaceStateForPrivateScratchAllocation, computeUnitsUsedForScratch, + privateScratchAllocationAddress, 0, perThreadPrivateScratchSize, nullptr, false, + scratchType, false, true); + } +} + +uint64_t ScratchSpaceControllerXeHPPlus::calculateNewGSH() { + return 0u; +} +uint64_t ScratchSpaceControllerXeHPPlus::getScratchPatchAddress() { + uint64_t scratchAddress = 0u; + if (scratchAllocation || privateScratchAllocation) { + if (ApiSpecificConfig::getBindlessConfiguration()) { + scratchAddress = bindlessSS.surfaceStateOffset; + } else { + scratchAddress = static_cast(getOffsetToSurfaceState(slotId + sshOffset)); + } + } + return scratchAddress; +} + +size_t ScratchSpaceControllerXeHPPlus::getOffsetToSurfaceState(uint32_t requiredSlotCount) const { + auto offset = requiredSlotCount * singleSurfaceStateSize; + if (privateScratchSpaceSupported) { + offset *= 2; + } + return offset; +} + +void ScratchSpaceControllerXeHPPlus::reserveHeap(IndirectHeap::Type heapType, IndirectHeap *&indirectHeap) { + if (heapType == IndirectHeap::SURFACE_STATE) { + indirectHeap->getSpace(getOffsetToSurfaceState(stateSlotsCount)); + } +} +void ScratchSpaceControllerXeHPPlus::programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper, + uint32_t requiredPerThreadScratchSize, + uint32_t requiredPerThreadPrivateScratchSize, + uint32_t currentTaskCount, + OsContext &osContext, + bool &stateBaseAddressDirty, + bool &vfeStateDirty, + NEO::CommandStreamReceiver *csr) { + bool scratchSurfaceDirty; + prepareScratchAllocation(requiredPerThreadScratchSize, requiredPerThreadPrivateScratchSize, currentTaskCount, osContext, stateBaseAddressDirty, scratchSurfaceDirty, vfeStateDirty); + if (scratchSurfaceDirty) { + bindlessSS = heapsHelper->allocateSSInHeap(singleSurfaceStateSize * (privateScratchSpaceSupported ? 2 : 1), scratchAllocation, BindlessHeapsHelper::SCRATCH_SSH); + programSurfaceStateAtPtr(bindlessSS.ssPtr); + vfeStateDirty = true; + } + csr->makeResident(*bindlessSS.heapAllocation); +} + +void ScratchSpaceControllerXeHPPlus::prepareScratchAllocation(uint32_t requiredPerThreadScratchSize, + uint32_t requiredPerThreadPrivateScratchSize, + uint32_t currentTaskCount, + OsContext &osContext, + bool &stateBaseAddressDirty, + bool &scratchSurfaceDirty, + bool &vfeStateDirty) { + uint32_t requiredPerThreadScratchSizeAlignedUp = alignUp(requiredPerThreadScratchSize, 64); + size_t requiredScratchSizeInBytes = requiredPerThreadScratchSizeAlignedUp * computeUnitsUsedForScratch; + scratchSurfaceDirty = false; + auto multiTileCapable = osContext.getNumSupportedDevices() > 1; + if (scratchSizeBytes < requiredScratchSizeInBytes) { + if (scratchAllocation) { + scratchAllocation->updateTaskCount(currentTaskCount, osContext.getContextId()); + csrAllocationStorage.storeAllocation(std::unique_ptr(scratchAllocation), TEMPORARY_ALLOCATION); + } + scratchSurfaceDirty = true; + scratchSizeBytes = requiredScratchSizeInBytes; + perThreadScratchSize = requiredPerThreadScratchSizeAlignedUp; + AllocationProperties properties{this->rootDeviceIndex, true, scratchSizeBytes, GraphicsAllocation::AllocationType::SCRATCH_SURFACE, multiTileCapable, false, osContext.getDeviceBitfield()}; + scratchAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); + } + if (privateScratchSpaceSupported) { + uint32_t requiredPerThreadPrivateScratchSizeAlignedUp = alignUp(requiredPerThreadPrivateScratchSize, 64); + size_t requiredPrivateScratchSizeInBytes = requiredPerThreadPrivateScratchSizeAlignedUp * computeUnitsUsedForScratch; + if (privateScratchSizeBytes < requiredPrivateScratchSizeInBytes) { + if (privateScratchAllocation) { + privateScratchAllocation->updateTaskCount(currentTaskCount, osContext.getContextId()); + csrAllocationStorage.storeAllocation(std::unique_ptr(privateScratchAllocation), TEMPORARY_ALLOCATION); + } + privateScratchSizeBytes = requiredPrivateScratchSizeInBytes; + perThreadPrivateScratchSize = requiredPerThreadPrivateScratchSizeAlignedUp; + scratchSurfaceDirty = true; + AllocationProperties properties{this->rootDeviceIndex, true, privateScratchSizeBytes, GraphicsAllocation::AllocationType::PRIVATE_SURFACE, multiTileCapable, false, osContext.getDeviceBitfield()}; + privateScratchAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); + } + } +} + +void ScratchSpaceControllerXeHPPlus::programHeaps(HeapContainer &heapContainer, + uint32_t scratchSlot, + uint32_t requiredPerThreadScratchSize, + uint32_t requiredPerThreadPrivateScratchSize, + uint32_t currentTaskCount, + OsContext &osContext, + bool &stateBaseAddressDirty, + bool &vfeStateDirty) { + sshOffset = scratchSlot; + updateSlots = false; + setRequiredScratchSpace(heapContainer[0]->getUnderlyingBuffer(), sshOffset, requiredPerThreadScratchSize, requiredPerThreadPrivateScratchSize, currentTaskCount, osContext, stateBaseAddressDirty, vfeStateDirty); + + for (uint32_t i = 1; i < heapContainer.size(); ++i) { + surfaceStateHeap = static_cast(heapContainer[i]->getUnderlyingBuffer()); + updateSlots = false; + programSurfaceState(); + } + + updateSlots = true; +} + +} // namespace NEO diff --git a/shared/source/command_stream/scratch_space_controller_xehp_plus.h b/shared/source/command_stream/scratch_space_controller_xehp_plus.h new file mode 100644 index 0000000000..99cf13c565 --- /dev/null +++ b/shared/source/command_stream/scratch_space_controller_xehp_plus.h @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once +#include "shared/source/command_stream/scratch_space_controller.h" + +#include +#include + +namespace NEO { + +class ScratchSpaceControllerXeHPPlus : public ScratchSpaceController { + public: + ScratchSpaceControllerXeHPPlus(uint32_t rootDeviceIndex, + ExecutionEnvironment &environment, + InternalAllocationStorage &allocationStorage); + void setNewSshPtr(void *newSsh, bool &cfeDirty, bool changeId); + + void setRequiredScratchSpace(void *sshBaseAddress, + uint32_t scratchSlot, + uint32_t requiredPerThreadScratchSize, + uint32_t requiredPerThreadPrivateScratchSize, + uint32_t currentTaskCount, + OsContext &osContext, + bool &stateBaseAddressDirty, + bool &vfeStateDirty) override; + + uint64_t calculateNewGSH() override; + uint64_t getScratchPatchAddress() override; + + void reserveHeap(IndirectHeap::Type heapType, IndirectHeap *&indirectHeap) override; + + void programHeaps(HeapContainer &heapContainer, + uint32_t scratchSlot, + uint32_t requiredPerThreadScratchSize, + uint32_t requiredPerThreadPrivateScratchSize, + uint32_t currentTaskCount, + OsContext &osContext, + bool &stateBaseAddressDirty, + bool &vfeStateDirty) override; + void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper, + uint32_t requiredPerThreadScratchSize, + uint32_t requiredPerThreadPrivateScratchSize, + uint32_t currentTaskCount, + OsContext &osContext, + bool &stateBaseAddressDirty, + bool &vfeStateDirty, + NEO::CommandStreamReceiver *csr) override; + + protected: + MOCKABLE_VIRTUAL void programSurfaceState(); + MOCKABLE_VIRTUAL void programSurfaceStateAtPtr(void *surfaceStateForScratchAllocation); + MOCKABLE_VIRTUAL void prepareScratchAllocation(uint32_t requiredPerThreadScratchSize, + uint32_t requiredPerThreadPrivateScratchSize, + uint32_t currentTaskCount, + OsContext &osContext, + bool &stateBaseAddressDirty, + bool &scratchSurfaceDirty, + bool &vfeStateDirty); + size_t getOffsetToSurfaceState(uint32_t requiredSlotCount) const; + + bool updateSlots = true; + uint32_t stateSlotsCount = 16; + static const uint32_t scratchType = 6; + bool privateScratchSpaceSupported = true; + + char *surfaceStateHeap = nullptr; + size_t singleSurfaceStateSize = 0; + uint32_t slotId = 0; + uint32_t perThreadScratchSize = 0; + uint32_t perThreadPrivateScratchSize = 0; + uint32_t sshOffset = 0; + SurfaceStateInHeapInfo bindlessSS = {}; +}; + +} // namespace NEO diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index eb0171e76d..590531cf42 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -58,9 +58,20 @@ DECLARE_DEBUG_VARIABLE(bool, ZebinAppendElws, false, "Append crossthread data wi DECLARE_DEBUG_VARIABLE(bool, ZebinIgnoreIcbeVersion, false, "Ignore IGC\'s ICBE version") DECLARE_DEBUG_VARIABLE(bool, UseExternalAllocatorForSshAndDsh, false, "Use 32 bit external Allocator for ssh and dsh in Level Zero") DECLARE_DEBUG_VARIABLE(bool, UseBindlessDebugSip, false, "Use bindless debug system routine") +DECLARE_DEBUG_VARIABLE(bool, CleanStateInPreamble, false, "Ensures clean state in preamble.") +DECLARE_DEBUG_VARIABLE(bool, EnableStatelessCompression, false, "Enable E2EC in SBA for all stateless accesses") +DECLARE_DEBUG_VARIABLE(bool, EnableStatelessCompressionWithUnifiedMemory, false, "Enable stateless compression with unified memory") +DECLARE_DEBUG_VARIABLE(bool, UseClearColorAllocationForBlitter, false, "false: disable (default), true: enable. If set then clear color allocation for blitter operations is created.") +DECLARE_DEBUG_VARIABLE(bool, EnableMultiGpuAtomicsOptimization, true, "Enable multi GPU atomics optimization") +DECLARE_DEBUG_VARIABLE(bool, DisableCachingForHeaps, false, "When set driver will not cache heaps in L3/L1.") +DECLARE_DEBUG_VARIABLE(bool, GlobalSequencerFlushOnCopyEngine, false, "false: disable (default), true: enable. If set then global sequencer flash is added for blitter") +DECLARE_DEBUG_VARIABLE(bool, UseImmDataWriteModeOnPostSyncOperation, false, "Use IMM data write mode as post sync operation in Compute Walker") DECLARE_DEBUG_VARIABLE(bool, DisableTimestampEvents, false, "Timestamp info will not be reported and events will only perform regular synchronization functions") +DECLARE_DEBUG_VARIABLE(bool, EnableResourceTags, false, "Enable resource tagging in GMM") DECLARE_DEBUG_VARIABLE(std::string, ForceDeviceId, std::string("unk"), "DeviceId selected for testing") DECLARE_DEBUG_VARIABLE(std::string, LoadBinarySipFromFile, std::string("unk"), "Select binary file to load SIP kernel raw binary") +DECLARE_DEBUG_VARIABLE(int64_t, OverrideMultiStoragePlacement, -1, "-1: disable, 0+: tile mask, each bit corresponds to tile") +DECLARE_DEBUG_VARIABLE(int64_t, ForceCompressionDisabledForCompressedBlitCopies, -1, "-1: default, 0: disabled, 1: enabled. If compression is required, set AUX_CCS_E, but force CompressionEnable filed. 0 should result in uncompressed read/write") DECLARE_DEBUG_VARIABLE(int32_t, ForceL1Caching, -1, "-1: default, 0: disable, 1: enable, When set to true driver will program L1 cache policy for surface state and stateless accessess") DECLARE_DEBUG_VARIABLE(int32_t, ForceAuxTranslationEnabled, -1, "-1: default, 0: disabled, 1: enabled") DECLARE_DEBUG_VARIABLE(int32_t, SchedulerSimulationReturnInstance, 0, "prints execution model related debug information") @@ -100,6 +111,42 @@ DECLARE_DEBUG_VARIABLE(int32_t, DebuggerLogBitmask, 0, "0: logs disabled, 1 - IN DECLARE_DEBUG_VARIABLE(int32_t, DebuggerOptDisable, -1, "-1: default from debugger query, 0: do not add opt-disable, 1: add opt-disable") DECLARE_DEBUG_VARIABLE(int32_t, DebugApiUsed, 0, "0: default L0 Debug API not used, 1: L0 Debug API used") DECLARE_DEBUG_VARIABLE(int32_t, OverrideCsrAllocationSize, -1, "-1: default, >0: use value for size of CSR allocation") +DECLARE_DEBUG_VARIABLE(int32_t, CFEComputeOverdispatchDisable, -1, "Set Compute Overdispatch Disable field in CFE_STATE, -1: do not set.") +DECLARE_DEBUG_VARIABLE(int32_t, CFEWeightedDispatchModeDisable, -1, "Set Weighted Dispatch Mode Disable field in CFE_STATE on XEHP, -1: do not set.") +DECLARE_DEBUG_VARIABLE(int32_t, CFESingleSliceDispatchCCSMode, -1, "Set Single Slice Dispatch CCS Mode in CFE_STATE on XEHP, -1 - do not set") +DECLARE_DEBUG_VARIABLE(int32_t, CFENumberOfWalkers, -1, "Set Number of Walkers in CFE_STATE on XEHP, -1 - do not set") +DECLARE_DEBUG_VARIABLE(int32_t, CFEMaximumNumberOfThreads, -1, "Set Maximum Number of Threads in CFE_STATE on XEHP, -1 - do not set") +DECLARE_DEBUG_VARIABLE(int32_t, CFEOverDispatchControl, -1, "Set Over Dispatch Control in CFE_STATE on XEHP, -1 - do not set") +DECLARE_DEBUG_VARIABLE(int32_t, CFELargeGRFThreadAdjustDisable, -1, "Set Large GRF thread adjust Disable field in CFE_STATE, -1 - do not set") +DECLARE_DEBUG_VARIABLE(int32_t, SynchronizeWalkerInWparidMode, -1, "-1: default, 0: do not synchronize 1: synchronize all tiles prior to doing work distrubution") +DECLARE_DEBUG_VARIABLE(int32_t, EnableWalkerPartition, -1, "-1: default, 0: disable, 1: enable, Enables Walker Partitioning via WPARID.") +DECLARE_DEBUG_VARIABLE(int32_t, OverrideNumComputeUnitsForScratch, -1, "Override number of compute units used for scratch size calculation") +DECLARE_DEBUG_VARIABLE(int32_t, ForceWorkgroupSize1x1x1, -1, "-1: default, 0: disable, 1: enable, force workgroup size 1x1x1 in builtins") +DECLARE_DEBUG_VARIABLE(int32_t, ForceThreadGroupDispatchSize, -1, "Set ThreadGroupDispatchSize in INTERFACE_DESCRIPTOR_DATA, -1 - default, 0 - TG size 8, 1 - TG size 4, 2 - TG size 2, 3 - Reserved") +DECLARE_DEBUG_VARIABLE(int32_t, ForceStatelessL1CachingPolicy, -1, "-1: default, >=0 : program value for stateless L1 caching") +DECLARE_DEBUG_VARIABLE(int32_t, ForceMemoryBankIndexOverride, -1, "-1: default, 0: disable, 1:enable, Force index=1 of memory bank for XEHP") +DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalSynchronizeWithSemaphores, -1, "Experimental implementation: 1: Emit Semaphores waiting after Walker completion in WPARID mode 0: do not emit semaphores after Walker") +DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalForceCrossAtomicSynchronization, -1, "Experimental implementation: 1: Cross Tile Atomic Synchronization present 0: Cross tile atomic synchronization disabled") +DECLARE_DEBUG_VARIABLE(int32_t, EnablePrivateScratchSlot1, -1, "-1: default, 0: disable, 1: enable Allows using private scratch space") +DECLARE_DEBUG_VARIABLE(int32_t, DisablePipeControlPrecedingPostSyncCommand, -1, "-1 default - disabled adding PIPE_CONTROL, 0 - disabled adding PIPE_CONTROL, 1 - enabled adding PIPE_CONTROL") +DECLARE_DEBUG_VARIABLE(int32_t, UseCachingPolicyForIndirectObjectHeap, -1, "Use selected caching policy for IOH, -1 - default, 0 - Uncached, 1 - L3 Caching, 2 - L1 Caching") +DECLARE_DEBUG_VARIABLE(int32_t, MultiTileIsaPlacement, -1, "Place ISA allocation on multi tiles, -1 - default, 0 - disabled, 1 - enabled") +DECLARE_DEBUG_VARIABLE(int32_t, FormatForStatelessCompressionWithUnifiedMemory, 0xF, "Format for stateless compression with unified memory") +DECLARE_DEBUG_VARIABLE(int32_t, ForceMultiGpuPartialWritesInComputeMode, -1, "-1: default - 0 for multiOsContext capable, 0: program value 0 in MultiGpuPartialWrites bit in STATE_COMPUTE_MODE, 1: program value 1 in MultiGpuPartialWrites bit in STATE_COMPUTE_MODE,") +DECLARE_DEBUG_VARIABLE(int32_t, ForceMultiGpuPartialWrites, -1, "-1: default - 0 for multiOsContext capable, 0: program value 0 in MultiGpuPartialWrites controls 1: program value 1 in MultiGpuPartialWrites controls") +DECLARE_DEBUG_VARIABLE(int32_t, ForceMultiGpuAtomicsInComputeMode, -1, "-1: default - 0 for multiOsContext capable, 0: program value 0 in MultiGpuAtomics bit in STATE_COMPUTE_MODE, 1: program value 1 in MultiGpuAtomics bit in STATE_COMPUTE_MODE") +DECLARE_DEBUG_VARIABLE(int32_t, ForceMultiGpuAtomics, -1, "-1: default - 0 for multiOsContext capable, 0: program value 0 in MultiGpuAtomics controls 1: program value 1 in MultiGpuAtomics controls") +DECLARE_DEBUG_VARIABLE(int32_t, ForceBufferCompressionFormat, -1, "-1: default, >0: Format value") +DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalSetWalkerPartitionCount, 0, "Experimental implementation: Set number of COMPUTE_WALKERs for a given Partition Type, 0 - do not set the feature.") +DECLARE_DEBUG_VARIABLE(int32_t, EnableHwGenerationLocalIds, -1, "-1: default, 0: disable, 1: enable : Enables generation of local ids on HW") +DECLARE_DEBUG_VARIABLE(int32_t, WalkerPartitionPreferHighestDimension, -1, "-1: default, 0: prefer biggest dimension, 1: prefer Z over Y over X if they divide partition count evenly") +DECLARE_DEBUG_VARIABLE(int32_t, SetMinimalPartitionSize, -1, "-1 default value set to 512 workgroups, 0 - disabled, >0 - minimal partition size in workgroups (should be power of 2)") +DECLARE_DEBUG_VARIABLE(int32_t, OverrideBlitterTargetMemory, -1, "-1:default 0: overwrites to System 1: overwrites to Local") +DECLARE_DEBUG_VARIABLE(int32_t, OverrideBlitterMocs, -1, "-1: default, >=0 SetGivenMocsInBlitterTransfers") +DECLARE_DEBUG_VARIABLE(int32_t, ExperimentalSetWalkerPartitionType, -1, "Experimental implementation: Set COMPUTE_WALKER Partition Type. Valid values for types from 1 to 3") +DECLARE_DEBUG_VARIABLE(int32_t, OverridePostSyncMocs, -1, "-1: default, >=0 Override post sync mocs with value") +DECLARE_DEBUG_VARIABLE(int32_t, EnableImmediateVmBindExt, -1, "Use immediate bind extension to a new residency model on Linux (requires kernel support), -1: default (enabled whith direct submission), 0: disabled, 1: enabled") +DECLARE_DEBUG_VARIABLE(int32_t, ForceExecutionTile, -1, "-1: default, 0+: given tile is choosen as submission, must be used with EnableWalkerPartition = 0.") DECLARE_DEBUG_VARIABLE(int32_t, OverrideTimestampPacketSize, -1, "-1: default, >0: size in bytes. 4 and 8 supported for experiments") DECLARE_DEBUG_VARIABLE(int32_t, OverrideMaxWorkGroupCount, -1, "-1: default, >0: Max WG size") diff --git a/shared/source/gen_common/aub_mapper.h b/shared/source/gen_common/aub_mapper.h index 9423073074..bb9b3d0d4c 100644 --- a/shared/source/gen_common/aub_mapper.h +++ b/shared/source/gen_common/aub_mapper.h @@ -18,3 +18,6 @@ #ifdef SUPPORT_GEN12LP #include "shared/source/gen12lp/aub_mapper.h" #endif +#ifdef SUPPORT_XE_HP_CORE +#include "shared/source/xe_hp_core/aub_mapper.h" +#endif diff --git a/shared/source/generated/xe_hp_core/hw_cmds_generated_xe_hp_core.inl b/shared/source/generated/xe_hp_core/hw_cmds_generated_xe_hp_core.inl new file mode 100644 index 0000000000..95cafd9507 --- /dev/null +++ b/shared/source/generated/xe_hp_core/hw_cmds_generated_xe_hp_core.inl @@ -0,0 +1,7440 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma pack(1) + +typedef struct tagBINDING_TABLE_STATE { + union tagTheStructure { + struct tagCommon { + uint32_t Reserved_0 : BITFIELD_RANGE(0, 5); + uint32_t SurfaceStatePointer : BITFIELD_RANGE(6, 31); + } Common; + uint32_t RawData[1]; + } TheStructure; + typedef enum tagPATCH_CONSTANTS { + SURFACESTATEPOINTER_BYTEOFFSET = 0x0, + SURFACESTATEPOINTER_INDEX = 0x0, + } PATCH_CONSTANTS; + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + } + static tagBINDING_TABLE_STATE sInit(void) { + BINDING_TABLE_STATE state; + state.init(); + return state; + } + inline uint32_t &getRawData(const uint32_t index) { + DEBUG_BREAK_IF(index >= 1); + return TheStructure.RawData[index]; + } + inline const uint32_t &getRawData(const uint32_t index) const { + DEBUG_BREAK_IF(index >= 1); + return TheStructure.RawData[index]; + } + typedef enum tagSURFACESTATEPOINTER { + SURFACESTATEPOINTER_BIT_SHIFT = 0x6, + SURFACESTATEPOINTER_ALIGN_SIZE = 0x40, + } SURFACESTATEPOINTER; + inline void setSurfaceStatePointer(const uint64_t value) { + DEBUG_BREAK_IF(value >= 0x100000000); + TheStructure.Common.SurfaceStatePointer = (uint32_t)value >> SURFACESTATEPOINTER_BIT_SHIFT; + } + inline uint32_t getSurfaceStatePointer(void) const { + return (TheStructure.Common.SurfaceStatePointer << SURFACESTATEPOINTER_BIT_SHIFT); + } +} BINDING_TABLE_STATE; +STATIC_ASSERT(4 == sizeof(BINDING_TABLE_STATE)); + +typedef struct tagMEDIA_SURFACE_STATE { + union tagTheStructure { + struct tagCommon { + uint32_t Reserved_0 : BITFIELD_RANGE(0, 29); + uint32_t Rotation : BITFIELD_RANGE(30, 31); + uint32_t Cr_VCb_UPixelOffsetVDirection : BITFIELD_RANGE(0, 1); + uint32_t PictureStructure : BITFIELD_RANGE(2, 3); + uint32_t Width : BITFIELD_RANGE(4, 17); + uint32_t Height : BITFIELD_RANGE(18, 31); + uint32_t TileMode : BITFIELD_RANGE(0, 1); + uint32_t HalfPitchForChroma : BITFIELD_RANGE(2, 2); + uint32_t SurfacePitch : BITFIELD_RANGE(3, 20); + uint32_t AddressControl : BITFIELD_RANGE(21, 21); + uint32_t MemoryCompressionEnable : BITFIELD_RANGE(22, 22); + uint32_t MemoryCompressionMode : BITFIELD_RANGE(23, 23); + uint32_t Cr_VCb_UPixelOffsetVDirectionMsb : BITFIELD_RANGE(24, 24); + uint32_t Cr_VCb_UPixelOffsetUDirection : BITFIELD_RANGE(25, 25); + uint32_t InterleaveChroma : BITFIELD_RANGE(26, 26); + uint32_t SurfaceFormat : BITFIELD_RANGE(27, 31); + uint32_t YOffsetForU_Cb : BITFIELD_RANGE(0, 13); + uint32_t Reserved_110 : BITFIELD_RANGE(14, 15); + uint32_t XOffsetForU_Cb : BITFIELD_RANGE(16, 29); + uint32_t Reserved_126 : BITFIELD_RANGE(30, 31); + uint32_t Reserved_128; + uint32_t SurfaceMemoryObjectControlState_Reserved : BITFIELD_RANGE(0, 0); + uint32_t SurfaceMemoryObjectControlState_IndexToMocsTables : BITFIELD_RANGE(1, 6); + uint32_t Reserved_167 : BITFIELD_RANGE(7, 17); + uint32_t TiledResourceMode : BITFIELD_RANGE(18, 19); + uint32_t Depth : BITFIELD_RANGE(20, 23); + uint32_t Reserved_184 : BITFIELD_RANGE(24, 29); + uint32_t VerticalLineStrideOffset : BITFIELD_RANGE(30, 30); + uint32_t VerticalLineStride : BITFIELD_RANGE(31, 31); + uint32_t SurfaceBaseAddressLow; + uint32_t SurfaceBaseAddressHigh : BITFIELD_RANGE(0, 15); + uint32_t Reserved_240 : BITFIELD_RANGE(16, 31); + } Common; + struct tagSurfaceFormatIsNotOneOfPlanarFormats { + uint32_t Reserved_0; + uint32_t Reserved_32; + uint32_t Reserved_64; + uint32_t Reserved_96; + uint32_t Reserved_128; + uint32_t Reserved_160; + uint32_t Reserved_192; + uint32_t Reserved_224; + } SurfaceFormatIsNotOneOfPlanarFormats; + struct tagSurfaceFormatIsOneOfPlanarFormats { + uint32_t Reserved_0 : BITFIELD_RANGE(0, 15); + uint32_t YOffset : BITFIELD_RANGE(16, 19); + uint32_t XOffset : BITFIELD_RANGE(20, 26); + uint32_t Reserved_27 : BITFIELD_RANGE(27, 31); + uint32_t Reserved_32; + uint32_t Reserved_64; + uint32_t Reserved_96; + uint32_t Reserved_128; + uint32_t Reserved_160; + uint32_t Reserved_192; + uint32_t Reserved_224; + } SurfaceFormatIsOneOfPlanarFormats; + struct tag_SurfaceFormatIsOneOfPlanarAnd_InterleaveChromaIs0 { + uint32_t Reserved_0; + uint32_t Reserved_32; + uint32_t Reserved_64; + uint32_t Reserved_96; + uint32_t YOffsetForV_Cr : BITFIELD_RANGE(0, 14); + uint32_t Reserved_143 : BITFIELD_RANGE(15, 15); + uint32_t XOffsetForV_Cr : BITFIELD_RANGE(16, 29); + uint32_t Reserved_158 : BITFIELD_RANGE(30, 31); + uint32_t Reserved_160; + uint32_t Reserved_192; + uint32_t Reserved_224; + } _SurfaceFormatIsOneOfPlanarAnd_InterleaveChromaIs0; + uint32_t RawData[8]; + } TheStructure; + typedef enum tagROTATION { + ROTATION_NO_ROTATION_OR_0_DEGREE = 0x0, + ROTATION_90_DEGREE_ROTATION = 0x1, + ROTATION_180_DEGREE_ROTATION = 0x2, + ROTATION_270_DEGREE_ROTATION = 0x3, + } ROTATION; + typedef enum tagPICTURE_STRUCTURE { + PICTURE_STRUCTURE_FRAME_PICTURE = 0x0, + PICTURE_STRUCTURE_TOP_FIELD_PICTURE = 0x1, + PICTURE_STRUCTURE_BOTTOM_FIELD_PICTURE = 0x2, + PICTURE_STRUCTURE_INVALID_NOT_ALLOWED = 0x3, + } PICTURE_STRUCTURE; + typedef enum tagTILE_MODE { + TILE_MODE_TILEMODE_LINEAR = 0x0, + TILE_MODE_TILEMODE_XMAJOR = 0x2, + TILE_MODE_TILEMODE_YMAJOR = 0x3, + } TILE_MODE; + typedef enum tagADDRESS_CONTROL { + ADDRESS_CONTROL_CLAMP = 0x0, + ADDRESS_CONTROL_MIRROR = 0x1, + } ADDRESS_CONTROL; + typedef enum tagMEMORY_COMPRESSION_MODE { + MEMORY_COMPRESSION_MODE_HORIZONTAL_COMPRESSION_MODE = 0x0, + MEMORY_COMPRESSION_MODE_VERTICAL_COMPRESSION_MODE = 0x1, + } MEMORY_COMPRESSION_MODE; + typedef enum tagSURFACE_FORMAT { + SURFACE_FORMAT_YCRCB_NORMAL = 0x0, + SURFACE_FORMAT_YCRCB_SWAPUVY = 0x1, + SURFACE_FORMAT_YCRCB_SWAPUV = 0x2, + SURFACE_FORMAT_YCRCB_SWAPY = 0x3, + SURFACE_FORMAT_PLANAR_420_8 = 0x4, + SURFACE_FORMAT_Y8_UNORM_VA = 0x5, + SURFACE_FORMAT_Y16_SNORM = 0x6, + SURFACE_FORMAT_Y16_UNORM_VA = 0x7, + SURFACE_FORMAT_R10G10B10A2_UNORM = 0x8, + SURFACE_FORMAT_R8G8B8A8_UNORM = 0x9, + SURFACE_FORMAT_R8B8_UNORM_CRCB = 0xa, + SURFACE_FORMAT_R8_UNORM_CR_CB = 0xb, + SURFACE_FORMAT_Y8_UNORM = 0xc, + SURFACE_FORMAT_A8Y8U8V8_UNORM = 0xd, + SURFACE_FORMAT_B8G8R8A8_UNORM = 0xe, + SURFACE_FORMAT_R16G16B16A16 = 0xf, + SURFACE_FORMAT_Y1_UNORM = 0x10, + SURFACE_FORMAT_Y32_UNORM = 0x11, + SURFACE_FORMAT_PLANAR_422_8 = 0x12, + SURFACE_FORMAT_FM_STRBUF_Y1 = 0x13, + SURFACE_FORMAT_FM_STRBUF_Y8 = 0x14, + SURFACE_FORMAT_FM_STRBUF_Y16 = 0x15, + SURFACE_FORMAT_FM_STRBUF_Y32 = 0x16, + SURFACE_FORMAT_PLANAR_420_16 = 0x17, + SURFACE_FORMAT_R16B16_UNORM_CRCB = 0x18, + SURFACE_FORMAT_R16_UNORM_CR_CB = 0x19, + SURFACE_FORMAT_Y16_UNORM = 0x1a, + } SURFACE_FORMAT; + typedef enum tagSURFACE_MEMORY_OBJECT_CONTROL_STATE { + SURFACE_MEMORY_OBJECT_CONTROL_STATE_DEFAULTVAUEDESC = 0x0, + } SURFACE_MEMORY_OBJECT_CONTROL_STATE; + typedef enum tagTILED_RESOURCE_MODE { + TILED_RESOURCE_MODE_TRMODE_NONE = 0x0, + TILED_RESOURCE_MODE_TRMODE_TILEYF = 0x1, + TILED_RESOURCE_MODE_TRMODE_TILEYS = 0x2, + } TILED_RESOURCE_MODE; + typedef enum tagPATCH_CONSTANTS { + SURFACEBASEADDRESS_BYTEOFFSET = 0x18, + SURFACEBASEADDRESS_INDEX = 0x6, + SURFACEBASEADDRESSHIGH_BYTEOFFSET = 0x1c, + SURFACEBASEADDRESSHIGH_INDEX = 0x7, + } PATCH_CONSTANTS; + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.Rotation = ROTATION_NO_ROTATION_OR_0_DEGREE; + TheStructure.Common.PictureStructure = PICTURE_STRUCTURE_FRAME_PICTURE; + TheStructure.Common.TileMode = TILE_MODE_TILEMODE_LINEAR; + TheStructure.Common.AddressControl = ADDRESS_CONTROL_CLAMP; + TheStructure.Common.MemoryCompressionMode = MEMORY_COMPRESSION_MODE_HORIZONTAL_COMPRESSION_MODE; + TheStructure.Common.SurfaceFormat = SURFACE_FORMAT_YCRCB_NORMAL; + TheStructure.Common.TiledResourceMode = TILED_RESOURCE_MODE_TRMODE_NONE; + } + static tagMEDIA_SURFACE_STATE sInit(void) { + MEDIA_SURFACE_STATE state; + state.init(); + return state; + } + inline uint32_t &getRawData(const uint32_t index) { + DEBUG_BREAK_IF(index >= 8); + return TheStructure.RawData[index]; + } + inline void setRotation(const ROTATION value) { + TheStructure.Common.Rotation = value; + } + inline ROTATION getRotation(void) const { + return static_cast(TheStructure.Common.Rotation); + } + inline void setCrVCbUPixelOffsetVDirection(const uint32_t value) { + TheStructure.Common.Cr_VCb_UPixelOffsetVDirection = value; + } + inline uint32_t getCrVCbUPixelOffsetVDirection(void) const { + return (TheStructure.Common.Cr_VCb_UPixelOffsetVDirection); + } + inline void setPictureStructure(const PICTURE_STRUCTURE value) { + TheStructure.Common.PictureStructure = value; + } + inline PICTURE_STRUCTURE getPictureStructure(void) const { + return static_cast(TheStructure.Common.PictureStructure); + } + inline void setWidth(const uint32_t value) { + TheStructure.Common.Width = value - 1; + } + inline uint32_t getWidth(void) const { + return (TheStructure.Common.Width + 1); + } + inline void setHeight(const uint32_t value) { + TheStructure.Common.Height = value - 1; + } + inline uint32_t getHeight(void) const { + return (TheStructure.Common.Height + 1); + } + inline void setTileMode(const TILE_MODE value) { + TheStructure.Common.TileMode = value; + } + inline TILE_MODE getTileMode(void) const { + return static_cast(TheStructure.Common.TileMode); + } + inline void setHalfPitchForChroma(const bool value) { + TheStructure.Common.HalfPitchForChroma = value; + } + inline bool getHalfPitchForChroma(void) const { + return (TheStructure.Common.HalfPitchForChroma); + } + inline void setSurfacePitch(const uint32_t value) { + TheStructure.Common.SurfacePitch = value - 1; + } + inline uint32_t getSurfacePitch(void) const { + return (TheStructure.Common.SurfacePitch + 1); + } + inline void setAddressControl(const ADDRESS_CONTROL value) { + TheStructure.Common.AddressControl = value; + } + inline ADDRESS_CONTROL getAddressControl(void) const { + return static_cast(TheStructure.Common.AddressControl); + } + inline void setMemoryCompressionEnable(const bool value) { + TheStructure.Common.MemoryCompressionEnable = value; + } + inline bool getMemoryCompressionEnable(void) const { + return (TheStructure.Common.MemoryCompressionEnable); + } + inline void setMemoryCompressionMode(const MEMORY_COMPRESSION_MODE value) { + TheStructure.Common.MemoryCompressionMode = value; + } + inline MEMORY_COMPRESSION_MODE getMemoryCompressionMode(void) const { + return static_cast(TheStructure.Common.MemoryCompressionMode); + } + inline void setCrVCbUPixelOffsetVDirectionMsb(const uint32_t value) { + TheStructure.Common.Cr_VCb_UPixelOffsetVDirectionMsb = value; + } + inline uint32_t getCrVCbUPixelOffsetVDirectionMsb(void) const { + return (TheStructure.Common.Cr_VCb_UPixelOffsetVDirectionMsb); + } + inline void setCrVCbUPixelOffsetUDirection(const uint32_t value) { + TheStructure.Common.Cr_VCb_UPixelOffsetUDirection = value; + } + inline uint32_t getCrVCbUPixelOffsetUDirection(void) const { + return (TheStructure.Common.Cr_VCb_UPixelOffsetUDirection); + } + inline void setInterleaveChroma(const bool value) { + TheStructure.Common.InterleaveChroma = value; + } + inline bool getInterleaveChroma(void) const { + return (TheStructure.Common.InterleaveChroma); + } + inline void setSurfaceFormat(const SURFACE_FORMAT value) { + TheStructure.Common.SurfaceFormat = value; + } + inline SURFACE_FORMAT getSurfaceFormat(void) const { + return static_cast(TheStructure.Common.SurfaceFormat); + } + inline void setYOffsetForUCb(const uint32_t value) { + TheStructure.Common.YOffsetForU_Cb = value; + } + inline uint32_t getYOffsetForUCb(void) const { + return (TheStructure.Common.YOffsetForU_Cb); + } + inline void setXOffsetForUCb(const uint32_t value) { + TheStructure.Common.XOffsetForU_Cb = value; + } + inline uint32_t getXOffsetForUCb(void) const { + return (TheStructure.Common.XOffsetForU_Cb); + } + inline void setSurfaceMemoryObjectControlStateReserved(const uint32_t value) { + TheStructure.Common.SurfaceMemoryObjectControlState_Reserved = value; + } + inline uint32_t getSurfaceMemoryObjectControlStateReserved(void) const { + return (TheStructure.Common.SurfaceMemoryObjectControlState_Reserved); + } + inline void setSurfaceMemoryObjectControlStateIndexToMocsTables(const uint32_t value) { + TheStructure.Common.SurfaceMemoryObjectControlState_IndexToMocsTables = value >> 1; + } + inline uint32_t getSurfaceMemoryObjectControlStateIndexToMocsTables(void) const { + return (TheStructure.Common.SurfaceMemoryObjectControlState_IndexToMocsTables << 1); + } + inline void setTiledResourceMode(const TILED_RESOURCE_MODE value) { + TheStructure.Common.TiledResourceMode = value; + } + inline TILED_RESOURCE_MODE getTiledResourceMode(void) const { + return static_cast(TheStructure.Common.TiledResourceMode); + } + inline void setDepth(const uint32_t value) { + TheStructure.Common.Depth = value; + } + inline uint32_t getDepth(void) const { + return (TheStructure.Common.Depth); + } + inline void setVerticalLineStrideOffset(const uint32_t value) { + TheStructure.Common.VerticalLineStrideOffset = value; + } + inline uint32_t getVerticalLineStrideOffset(void) const { + return (TheStructure.Common.VerticalLineStrideOffset); + } + inline void setVerticalLineStride(const uint32_t value) { + TheStructure.Common.VerticalLineStride = value; + } + inline uint32_t getVerticalLineStride(void) const { + return (TheStructure.Common.VerticalLineStride); + } + inline void setSurfaceBaseAddress(const uint64_t value) { + TheStructure.Common.SurfaceBaseAddressLow = static_cast(value & 0xffffffff); + TheStructure.Common.SurfaceBaseAddressHigh = (value >> 32) & 0xffffffff; + } + inline uint64_t getSurfaceBaseAddress(void) const { + return (TheStructure.Common.SurfaceBaseAddressLow | + static_cast(TheStructure.Common.SurfaceBaseAddressHigh) << 32); + } + inline void setSurfaceBaseAddressHigh(const uint32_t value) { + TheStructure.Common.SurfaceBaseAddressHigh = value; + } + inline uint32_t getSurfaceBaseAddressHigh(void) const { + return (TheStructure.Common.SurfaceBaseAddressHigh); + } + typedef enum tagYOFFSET { + YOFFSET_BIT_SHIFT = 0x2, + YOFFSET_ALIGN_SIZE = 0x4, + } YOFFSET; + inline void setYOffset(const uint32_t value) { + TheStructure.SurfaceFormatIsOneOfPlanarFormats.YOffset = value >> YOFFSET_BIT_SHIFT; + } + inline uint32_t getYOffset(void) const { + return (TheStructure.SurfaceFormatIsOneOfPlanarFormats.YOffset << YOFFSET_BIT_SHIFT); + } + typedef enum tagXOFFSET { + XOFFSET_BIT_SHIFT = 0x2, + XOFFSET_ALIGN_SIZE = 0x4, + } XOFFSET; + inline void setXOffset(const uint32_t value) { + TheStructure.SurfaceFormatIsOneOfPlanarFormats.XOffset = value >> XOFFSET_BIT_SHIFT; + } + inline uint32_t getXOffset(void) const { + return (TheStructure.SurfaceFormatIsOneOfPlanarFormats.XOffset << XOFFSET_BIT_SHIFT); + } + inline void setYOffsetForVCr(const uint32_t value) { + TheStructure._SurfaceFormatIsOneOfPlanarAnd_InterleaveChromaIs0.YOffsetForV_Cr = value; + } + inline uint32_t getYOffsetForVCr(void) const { + return (TheStructure._SurfaceFormatIsOneOfPlanarAnd_InterleaveChromaIs0.YOffsetForV_Cr); + } + inline void setXOffsetForVCr(const uint32_t value) { + TheStructure._SurfaceFormatIsOneOfPlanarAnd_InterleaveChromaIs0.XOffsetForV_Cr = value; + } + inline uint32_t getXOffsetForVCr(void) const { + return (TheStructure._SurfaceFormatIsOneOfPlanarAnd_InterleaveChromaIs0.XOffsetForV_Cr); + } +} MEDIA_SURFACE_STATE; +STATIC_ASSERT(32 == sizeof(MEDIA_SURFACE_STATE)); + +typedef struct tagMI_MATH { + union _DW0 { + struct _BitField { + uint32_t DwordLength : BITFIELD_RANGE(0, 7); + uint32_t Reserved : BITFIELD_RANGE(8, 22); + uint32_t InstructionOpcode : BITFIELD_RANGE(23, 28); + uint32_t InstructionType : BITFIELD_RANGE(29, 31); + } BitField; + uint32_t Value; + } DW0; + typedef enum tagMI_COMMAND_OPCODE { + MI_COMMAND_OPCODE_MI_MATH = 0x1A, + } MI_COMMAND_OPCODE; + typedef enum tagCOMMAND_TYPE { + COMMAND_TYPE_MI_COMMAND = 0x0, + } COMMAND_TYPE; +} MI_MATH; + +typedef struct tagMI_MATH_ALU_INST_INLINE { + union _DW0 { + struct _BitField { + uint32_t Operand2 : BITFIELD_RANGE(0, 9); + uint32_t Operand1 : BITFIELD_RANGE(10, 19); + uint32_t ALUOpcode : BITFIELD_RANGE(20, 31); + } BitField; + uint32_t Value; + } DW0; +} MI_MATH_ALU_INST_INLINE; + +typedef struct tagPIPE_CONTROL { + union tagTheStructure { + struct tagCommon { + uint32_t DwordLength : BITFIELD_RANGE(0, 7); + uint32_t PredicateEnable : BITFIELD_RANGE(8, 8); + uint32_t HdcPipelineFlush : BITFIELD_RANGE(9, 9); + uint32_t Reserved_10 : BITFIELD_RANGE(10, 12); + uint32_t CompressionControlSurfaceCcsFlush : BITFIELD_RANGE(13, 13); + uint32_t WorkloadPartitionIdOffsetEnable : BITFIELD_RANGE(14, 14); + uint32_t Reserved_15 : BITFIELD_RANGE(15, 15); + uint32_t _3DCommandSubOpcode : BITFIELD_RANGE(16, 23); + uint32_t _3DCommandOpcode : BITFIELD_RANGE(24, 26); + uint32_t CommandSubtype : BITFIELD_RANGE(27, 28); + uint32_t CommandType : BITFIELD_RANGE(29, 31); + uint32_t DepthCacheFlushEnable : BITFIELD_RANGE(0, 0); + uint32_t StallAtPixelScoreboard : BITFIELD_RANGE(1, 1); + uint32_t StateCacheInvalidationEnable : BITFIELD_RANGE(2, 2); + uint32_t ConstantCacheInvalidationEnable : BITFIELD_RANGE(3, 3); + uint32_t VfCacheInvalidationEnable : BITFIELD_RANGE(4, 4); + uint32_t DcFlushEnable : BITFIELD_RANGE(5, 5); + uint32_t ProtectedMemoryApplicationId : BITFIELD_RANGE(6, 6); + uint32_t PipeControlFlushEnable : BITFIELD_RANGE(7, 7); + uint32_t NotifyEnable : BITFIELD_RANGE(8, 8); + uint32_t IndirectStatePointersDisable : BITFIELD_RANGE(9, 9); + uint32_t TextureCacheInvalidationEnable : BITFIELD_RANGE(10, 10); + uint32_t InstructionCacheInvalidateEnable : BITFIELD_RANGE(11, 11); + uint32_t RenderTargetCacheFlushEnable : BITFIELD_RANGE(12, 12); + uint32_t DepthStallEnable : BITFIELD_RANGE(13, 13); + uint32_t PostSyncOperation : BITFIELD_RANGE(14, 15); + uint32_t GenericMediaStateClear : BITFIELD_RANGE(16, 16); + uint32_t PssStallSyncEnable : BITFIELD_RANGE(17, 17); + uint32_t TlbInvalidate : BITFIELD_RANGE(18, 18); + uint32_t DepthStallSyncEnable : BITFIELD_RANGE(19, 19); + uint32_t CommandStreamerStallEnable : BITFIELD_RANGE(20, 20); + uint32_t StoreDataIndex : BITFIELD_RANGE(21, 21); + uint32_t ProtectedMemoryEnable : BITFIELD_RANGE(22, 22); + uint32_t LriPostSyncOperation : BITFIELD_RANGE(23, 23); + uint32_t DestinationAddressType : BITFIELD_RANGE(24, 24); + uint32_t AmfsFlushEnable : BITFIELD_RANGE(25, 25); + uint32_t FlushLlc : BITFIELD_RANGE(26, 26); + uint32_t ProtectedMemoryDisable : BITFIELD_RANGE(27, 27); + uint32_t TileCacheFlushEnable : BITFIELD_RANGE(28, 28); + uint32_t Reserved_61 : BITFIELD_RANGE(29, 29); + uint32_t L3FabricFlush : BITFIELD_RANGE(30, 30); + uint32_t TbimrForceBatchClosure : BITFIELD_RANGE(31, 31); + uint32_t Reserved_64 : BITFIELD_RANGE(0, 1); + uint32_t Address : BITFIELD_RANGE(2, 31); + uint32_t AddressHigh; + uint64_t ImmediateData; + } Common; + uint32_t RawData[6]; + } TheStructure; + typedef enum tagDWORD_LENGTH { + DWORD_LENGTH_DWORD_COUNT_N = 0x4, + } DWORD_LENGTH; + typedef enum tag_3D_COMMAND_SUB_OPCODE { + _3D_COMMAND_SUB_OPCODE_PIPE_CONTROL = 0x0, + } _3D_COMMAND_SUB_OPCODE; + typedef enum tag_3D_COMMAND_OPCODE { + _3D_COMMAND_OPCODE_PIPE_CONTROL = 0x2, + } _3D_COMMAND_OPCODE; + typedef enum tagCOMMAND_SUBTYPE { + COMMAND_SUBTYPE_GFXPIPE_3D = 0x3, + } COMMAND_SUBTYPE; + typedef enum tagCOMMAND_TYPE { + COMMAND_TYPE_GFXPIPE = 0x3, + } COMMAND_TYPE; + typedef enum tagPOST_SYNC_OPERATION { + POST_SYNC_OPERATION_NO_WRITE = 0x0, + POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA = 0x1, + POST_SYNC_OPERATION_WRITE_PS_DEPTH_COUNT = 0x2, + POST_SYNC_OPERATION_WRITE_TIMESTAMP = 0x3, + } POST_SYNC_OPERATION; + typedef enum tagLRI_POST_SYNC_OPERATION { + LRI_POST_SYNC_OPERATION_NO_LRI_OPERATION = 0x0, + LRI_POST_SYNC_OPERATION_MMIO_WRITE_IMMEDIATE_DATA = 0x1, + } LRI_POST_SYNC_OPERATION; + typedef enum tagDESTINATION_ADDRESS_TYPE { + DESTINATION_ADDRESS_TYPE_PPGTT = 0x0, + DESTINATION_ADDRESS_TYPE_GGTT = 0x1, + } DESTINATION_ADDRESS_TYPE; + typedef enum tagTBIMR_FORCE_BATCH_CLOSURE { + TBIMR_FORCE_BATCH_CLOSURE_NO_BATCH_CLOSURE = 0x0, + TBIMR_FORCE_BATCH_CLOSURE_CLOSE_BATCH = 0x1, + } TBIMR_FORCE_BATCH_CLOSURE; + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.DwordLength = DWORD_LENGTH_DWORD_COUNT_N; + TheStructure.Common._3DCommandSubOpcode = _3D_COMMAND_SUB_OPCODE_PIPE_CONTROL; + TheStructure.Common._3DCommandOpcode = _3D_COMMAND_OPCODE_PIPE_CONTROL; + TheStructure.Common.CommandSubtype = COMMAND_SUBTYPE_GFXPIPE_3D; + TheStructure.Common.CommandType = COMMAND_TYPE_GFXPIPE; + TheStructure.Common.PostSyncOperation = POST_SYNC_OPERATION_NO_WRITE; + TheStructure.Common.LriPostSyncOperation = LRI_POST_SYNC_OPERATION_NO_LRI_OPERATION; + TheStructure.Common.DestinationAddressType = DESTINATION_ADDRESS_TYPE_PPGTT; + TheStructure.Common.TbimrForceBatchClosure = TBIMR_FORCE_BATCH_CLOSURE_NO_BATCH_CLOSURE; + TheStructure.Common.CommandStreamerStallEnable = 1; + } + static tagPIPE_CONTROL sInit(void) { + PIPE_CONTROL state; + state.init(); + return state; + } + inline uint32_t &getRawData(const uint32_t index) { + UNRECOVERABLE_IF(index >= 6); + return TheStructure.RawData[index]; + } + inline void setPredicateEnable(const bool value) { + TheStructure.Common.PredicateEnable = value; + } + inline bool getPredicateEnable(void) const { + return TheStructure.Common.PredicateEnable; + } + inline void setHdcPipelineFlush(const bool value) { + TheStructure.Common.HdcPipelineFlush = value; + } + inline bool getHdcPipelineFlush(void) const { + return TheStructure.Common.HdcPipelineFlush; + } + inline void setCompressionControlSurfaceCcsFlush(const bool value) { + TheStructure.Common.CompressionControlSurfaceCcsFlush = value; + } + inline bool getCompressionControlSurfaceCcsFlush(void) const { + return TheStructure.Common.CompressionControlSurfaceCcsFlush; + } + inline void setWorkloadPartitionIdOffsetEnable(const bool value) { + TheStructure.Common.WorkloadPartitionIdOffsetEnable = value; + } + inline bool getWorkloadPartitionIdOffsetEnable(void) const { + return TheStructure.Common.WorkloadPartitionIdOffsetEnable; + } + inline void setDepthCacheFlushEnable(const bool value) { + TheStructure.Common.DepthCacheFlushEnable = value; + } + inline bool getDepthCacheFlushEnable(void) const { + return TheStructure.Common.DepthCacheFlushEnable; + } + inline void setStallAtPixelScoreboard(const bool value) { + TheStructure.Common.StallAtPixelScoreboard = value; + } + inline bool getStallAtPixelScoreboard(void) const { + return TheStructure.Common.StallAtPixelScoreboard; + } + inline void setStateCacheInvalidationEnable(const bool value) { + TheStructure.Common.StateCacheInvalidationEnable = value; + } + inline bool getStateCacheInvalidationEnable(void) const { + return TheStructure.Common.StateCacheInvalidationEnable; + } + inline void setConstantCacheInvalidationEnable(const bool value) { + TheStructure.Common.ConstantCacheInvalidationEnable = value; + } + inline bool getConstantCacheInvalidationEnable(void) const { + return TheStructure.Common.ConstantCacheInvalidationEnable; + } + inline void setVfCacheInvalidationEnable(const bool value) { + TheStructure.Common.VfCacheInvalidationEnable = value; + } + inline bool getVfCacheInvalidationEnable(void) const { + return TheStructure.Common.VfCacheInvalidationEnable; + } + inline void setDcFlushEnable(const bool value) { + TheStructure.Common.DcFlushEnable = value; + } + inline bool getDcFlushEnable(void) const { + return TheStructure.Common.DcFlushEnable; + } + inline void setProtectedMemoryApplicationId(const bool value) { + TheStructure.Common.ProtectedMemoryApplicationId = value; + } + inline bool getProtectedMemoryApplicationId(void) const { + return TheStructure.Common.ProtectedMemoryApplicationId; + } + inline void setPipeControlFlushEnable(const bool value) { + TheStructure.Common.PipeControlFlushEnable = value; + } + inline bool getPipeControlFlushEnable(void) const { + return TheStructure.Common.PipeControlFlushEnable; + } + inline void setNotifyEnable(const bool value) { + TheStructure.Common.NotifyEnable = value; + } + inline bool getNotifyEnable(void) const { + return TheStructure.Common.NotifyEnable; + } + inline void setIndirectStatePointersDisable(const bool value) { + TheStructure.Common.IndirectStatePointersDisable = value; + } + inline bool getIndirectStatePointersDisable(void) const { + return TheStructure.Common.IndirectStatePointersDisable; + } + inline void setTextureCacheInvalidationEnable(const bool value) { + TheStructure.Common.TextureCacheInvalidationEnable = value; + } + inline bool getTextureCacheInvalidationEnable(void) const { + return TheStructure.Common.TextureCacheInvalidationEnable; + } + inline void setInstructionCacheInvalidateEnable(const bool value) { + TheStructure.Common.InstructionCacheInvalidateEnable = value; + } + inline bool getInstructionCacheInvalidateEnable(void) const { + return TheStructure.Common.InstructionCacheInvalidateEnable; + } + inline void setRenderTargetCacheFlushEnable(const bool value) { + TheStructure.Common.RenderTargetCacheFlushEnable = value; + } + inline bool getRenderTargetCacheFlushEnable(void) const { + return TheStructure.Common.RenderTargetCacheFlushEnable; + } + inline void setDepthStallEnable(const bool value) { + TheStructure.Common.DepthStallEnable = value; + } + inline bool getDepthStallEnable(void) const { + return TheStructure.Common.DepthStallEnable; + } + inline void setPostSyncOperation(const POST_SYNC_OPERATION value) { + TheStructure.Common.PostSyncOperation = value; + } + inline POST_SYNC_OPERATION getPostSyncOperation(void) const { + return static_cast(TheStructure.Common.PostSyncOperation); + } + inline void setGenericMediaStateClear(const bool value) { + TheStructure.Common.GenericMediaStateClear = value; + } + inline bool getGenericMediaStateClear(void) const { + return TheStructure.Common.GenericMediaStateClear; + } + inline void setPssStallSyncEnable(const bool value) { + TheStructure.Common.PssStallSyncEnable = value; + } + inline bool getPssStallSyncEnable(void) const { + return TheStructure.Common.PssStallSyncEnable; + } + inline void setTlbInvalidate(const bool value) { + TheStructure.Common.TlbInvalidate = value; + } + inline bool getTlbInvalidate(void) const { + return TheStructure.Common.TlbInvalidate; + } + inline void setDepthStallSyncEnable(const bool value) { + TheStructure.Common.DepthStallSyncEnable = value; + } + inline bool getDepthStallSyncEnable(void) const { + return TheStructure.Common.DepthStallSyncEnable; + } + inline void setCommandStreamerStallEnable(const uint32_t value) { + TheStructure.Common.CommandStreamerStallEnable = value; + } + inline uint32_t getCommandStreamerStallEnable(void) const { + return TheStructure.Common.CommandStreamerStallEnable; + } + inline void setStoreDataIndex(const bool value) { + TheStructure.Common.StoreDataIndex = value; + } + inline bool getStoreDataIndex(void) const { + return TheStructure.Common.StoreDataIndex; + } + inline void setProtectedMemoryEnable(const bool value) { + TheStructure.Common.ProtectedMemoryEnable = value; + } + inline bool getProtectedMemoryEnable(void) const { + return TheStructure.Common.ProtectedMemoryEnable; + } + inline void setLriPostSyncOperation(const LRI_POST_SYNC_OPERATION value) { + TheStructure.Common.LriPostSyncOperation = value; + } + inline LRI_POST_SYNC_OPERATION getLriPostSyncOperation(void) const { + return static_cast(TheStructure.Common.LriPostSyncOperation); + } + inline void setDestinationAddressType(const DESTINATION_ADDRESS_TYPE value) { + TheStructure.Common.DestinationAddressType = value; + } + inline DESTINATION_ADDRESS_TYPE getDestinationAddressType(void) const { + return static_cast(TheStructure.Common.DestinationAddressType); + } + inline void setAmfsFlushEnable(const bool value) { + TheStructure.Common.AmfsFlushEnable = value; + } + inline bool getAmfsFlushEnable(void) const { + return TheStructure.Common.AmfsFlushEnable; + } + inline void setFlushLlc(const bool value) { + TheStructure.Common.FlushLlc = value; + } + inline bool getFlushLlc(void) const { + return TheStructure.Common.FlushLlc; + } + inline void setProtectedMemoryDisable(const bool value) { + TheStructure.Common.ProtectedMemoryDisable = value; + } + inline bool getProtectedMemoryDisable(void) const { + return TheStructure.Common.ProtectedMemoryDisable; + } + inline void setTileCacheFlushEnable(const bool value) { + TheStructure.Common.TileCacheFlushEnable = value; + } + inline bool getTileCacheFlushEnable(void) const { + return TheStructure.Common.TileCacheFlushEnable; + } + inline void setL3FabricFlush(const bool value) { + TheStructure.Common.L3FabricFlush = value; + } + inline bool getL3FabricFlush(void) const { + return TheStructure.Common.L3FabricFlush; + } + inline void setTbimrForceBatchClosure(const TBIMR_FORCE_BATCH_CLOSURE value) { + TheStructure.Common.TbimrForceBatchClosure = value; + } + inline TBIMR_FORCE_BATCH_CLOSURE getTbimrForceBatchClosure(void) const { + return static_cast(TheStructure.Common.TbimrForceBatchClosure); + } + typedef enum tagADDRESS { + ADDRESS_BIT_SHIFT = 0x2, + ADDRESS_ALIGN_SIZE = 0x4, + } ADDRESS; + inline void setAddress(const uint32_t value) { + UNRECOVERABLE_IF(value > 0xfffffffc); + TheStructure.Common.Address = value >> ADDRESS_BIT_SHIFT; + } + inline uint32_t getAddress(void) const { + return TheStructure.Common.Address << ADDRESS_BIT_SHIFT; + } + inline void setAddressHigh(const uint32_t value) { + TheStructure.Common.AddressHigh = value; + } + inline uint32_t getAddressHigh(void) const { + return TheStructure.Common.AddressHigh; + } + inline void setImmediateData(const uint64_t value) { + TheStructure.Common.ImmediateData = value; + } + inline uint64_t getImmediateData(void) const { + return TheStructure.Common.ImmediateData; + } +} PIPE_CONTROL; +STATIC_ASSERT(24 == sizeof(PIPE_CONTROL)); + +typedef struct tagMI_ATOMIC { + union tagTheStructure { + struct tagCommon { + uint32_t DwordLength : BITFIELD_RANGE(0, 7); + uint32_t AtomicOpcode : BITFIELD_RANGE(8, 15); + uint32_t ReturnDataControl : BITFIELD_RANGE(16, 16); + uint32_t CsStall : BITFIELD_RANGE(17, 17); + uint32_t InlineData : BITFIELD_RANGE(18, 18); + uint32_t DataSize : BITFIELD_RANGE(19, 20); + uint32_t PostSyncOperation : BITFIELD_RANGE(21, 21); + uint32_t MemoryType : BITFIELD_RANGE(22, 22); + uint32_t MiCommandOpcode : BITFIELD_RANGE(23, 28); + uint32_t CommandType : BITFIELD_RANGE(29, 31); + uint32_t WorkloadPartitionIdOffsetEnable : BITFIELD_RANGE(0, 0); + uint32_t Reserved_33 : BITFIELD_RANGE(1, 1); + uint32_t MemoryAddress : BITFIELD_RANGE(2, 31); + uint32_t MemoryAddressHigh : BITFIELD_RANGE(0, 15); + uint32_t Reserved_80 : BITFIELD_RANGE(16, 31); + uint32_t Operand1DataDword0; + uint32_t Operand2DataDword0; + uint32_t Operand1DataDword1; + uint32_t Operand2DataDword1; + uint32_t Operand1DataDword2; + uint32_t Operand2DataDword2; + uint32_t Operand1DataDword3; + uint32_t Operand2DataDword3; + } Common; + uint32_t RawData[11]; + } TheStructure; + typedef enum tagDWORD_LENGTH { + DWORD_LENGTH_INLINE_DATA_0 = 0x1, + DWORD_LENGTH_INLINE_DATA_1 = 0x9, + } DWORD_LENGTH; + typedef enum tagDATA_SIZE { + DATA_SIZE_DWORD = 0x0, + DATA_SIZE_QWORD = 0x1, + DATA_SIZE_OCTWORD = 0x2, + } DATA_SIZE; + typedef enum tagPOST_SYNC_OPERATION { + POST_SYNC_OPERATION_NO_POST_SYNC_OPERATION = 0x0, + POST_SYNC_OPERATION_POST_SYNC_OPERATION = 0x1, + } POST_SYNC_OPERATION; + typedef enum tagMEMORY_TYPE { + MEMORY_TYPE_PER_PROCESS_GRAPHICS_ADDRESS = 0x0, + MEMORY_TYPE_GLOBAL_GRAPHICS_ADDRESS = 0x1, + } MEMORY_TYPE; + typedef enum tagMI_COMMAND_OPCODE { + MI_COMMAND_OPCODE_MI_ATOMIC = 0x2f, + } MI_COMMAND_OPCODE; + typedef enum tagCOMMAND_TYPE { + COMMAND_TYPE_MI_COMMAND = 0x0, + } COMMAND_TYPE; + typedef enum tagATOMIC_OPCODES { + ATOMIC_4B_MOVE = 0x4, + ATOMIC_4B_INCREMENT = 0x5, + ATOMIC_4B_DECREMENT = 0x6, + ATOMIC_8B_MOVE = 0x24, + ATOMIC_8B_INCREMENT = 0x25, + ATOMIC_8B_DECREMENT = 0x26, + } ATOMIC_OPCODES; + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.DwordLength = DWORD_LENGTH_INLINE_DATA_0; + TheStructure.Common.DataSize = DATA_SIZE_DWORD; + TheStructure.Common.PostSyncOperation = + POST_SYNC_OPERATION_NO_POST_SYNC_OPERATION; + TheStructure.Common.MemoryType = MEMORY_TYPE_PER_PROCESS_GRAPHICS_ADDRESS; + TheStructure.Common.MiCommandOpcode = MI_COMMAND_OPCODE_MI_ATOMIC; + TheStructure.Common.CommandType = COMMAND_TYPE_MI_COMMAND; + } + static tagMI_ATOMIC sInit(void) { + MI_ATOMIC state; + state.init(); + return state; + } + inline uint32_t &getRawData(const uint32_t index) { + DEBUG_BREAK_IF(index >= 11); + return TheStructure.RawData[index]; + } + inline void setDwordLength(const DWORD_LENGTH value) { + TheStructure.Common.DwordLength = value; + } + inline DWORD_LENGTH getDwordLength(void) const { + return static_cast(TheStructure.Common.DwordLength); + } + inline void setAtomicOpcode(const uint32_t value) { + DEBUG_BREAK_IF(value > 0xff00); + TheStructure.Common.AtomicOpcode = value; + } + inline uint32_t getAtomicOpcode(void) const { + return TheStructure.Common.AtomicOpcode; + } + inline void setReturnDataControl(const uint32_t value) { + TheStructure.Common.ReturnDataControl = value; + } + inline uint32_t getReturnDataControl(void) const { + return TheStructure.Common.ReturnDataControl; + } + inline void setCsStall(const uint32_t value) { + TheStructure.Common.CsStall = value; + } + inline uint32_t getCsStall(void) const { return TheStructure.Common.CsStall; } + inline void setInlineData(const uint32_t value) { + TheStructure.Common.InlineData = value; + } + inline uint32_t getInlineData(void) const { + return TheStructure.Common.InlineData; + } + inline void setDataSize(const DATA_SIZE value) { + TheStructure.Common.DataSize = value; + } + inline DATA_SIZE getDataSize(void) const { + return static_cast(TheStructure.Common.DataSize); + } + inline void setPostSyncOperation(const POST_SYNC_OPERATION value) { + TheStructure.Common.PostSyncOperation = value; + } + inline POST_SYNC_OPERATION getPostSyncOperation(void) const { + return static_cast( + TheStructure.Common.PostSyncOperation); + } + inline void setMemoryType(const MEMORY_TYPE value) { + TheStructure.Common.MemoryType = value; + } + inline MEMORY_TYPE getMemoryType(void) const { + return static_cast(TheStructure.Common.MemoryType); + } + inline void setWorkloadPartitionIdOffsetEnable(const bool value) { + TheStructure.Common.WorkloadPartitionIdOffsetEnable = value; + } + inline bool getWorkloadPartitionIdOffsetEnable(void) const { + return TheStructure.Common.WorkloadPartitionIdOffsetEnable; + } + typedef enum tagMEMORYADDRESS { + MEMORYADDRESS_BIT_SHIFT = 0x2, + MEMORYADDRESS_ALIGN_SIZE = 0x4, + } MEMORYADDRESS; + inline void setMemoryAddress(const uint32_t value) { + DEBUG_BREAK_IF(value > 0xfffffffcL); + TheStructure.Common.MemoryAddress = value >> MEMORYADDRESS_BIT_SHIFT; + } + inline uint32_t getMemoryAddress(void) const { + return TheStructure.Common.MemoryAddress << MEMORYADDRESS_BIT_SHIFT; + } + inline void setMemoryAddressHigh(const uint32_t value) { + TheStructure.Common.MemoryAddressHigh = value; + } + inline uint32_t getMemoryAddressHigh(void) const { + return TheStructure.Common.MemoryAddressHigh; + } + inline void setOperand1DataDword0(const uint32_t value) { + TheStructure.Common.Operand1DataDword0 = value; + } + inline uint32_t getOperand1DataDword0(void) const { + return TheStructure.Common.Operand1DataDword0; + } + inline void setOperand2DataDword0(const uint32_t value) { + TheStructure.Common.Operand2DataDword0 = value; + } + inline uint32_t getOperand2DataDword0(void) const { + return TheStructure.Common.Operand2DataDword0; + } + inline void setOperand1DataDword1(const uint32_t value) { + TheStructure.Common.Operand1DataDword1 = value; + } + inline uint32_t getOperand1DataDword1(void) const { + return TheStructure.Common.Operand1DataDword1; + } + inline void setOperand2DataDword1(const uint32_t value) { + TheStructure.Common.Operand2DataDword1 = value; + } + inline uint32_t getOperand2DataDword1(void) const { + return TheStructure.Common.Operand2DataDword1; + } + inline void setOperand1DataDword2(const uint32_t value) { + TheStructure.Common.Operand1DataDword2 = value; + } + inline uint32_t getOperand1DataDword2(void) const { + return TheStructure.Common.Operand1DataDword2; + } + inline void setOperand2DataDword2(const uint32_t value) { + TheStructure.Common.Operand2DataDword2 = value; + } + inline uint32_t getOperand2DataDword2(void) const { + return TheStructure.Common.Operand2DataDword2; + } + inline void setOperand1DataDword3(const uint32_t value) { + TheStructure.Common.Operand1DataDword3 = value; + } + inline uint32_t getOperand1DataDword3(void) const { + return TheStructure.Common.Operand1DataDword3; + } + inline void setOperand2DataDword3(const uint32_t value) { + TheStructure.Common.Operand2DataDword3 = value; + } + inline uint32_t getOperand2DataDword3(void) const { + return TheStructure.Common.Operand2DataDword3; + } +} MI_ATOMIC; +STATIC_ASSERT(44 == sizeof(MI_ATOMIC)); + +typedef struct tagMI_BATCH_BUFFER_END { + union tagTheStructure { + struct tagCommon { + uint32_t EndContext : BITFIELD_RANGE(0, 0); + uint32_t Reserved_1 : BITFIELD_RANGE(1, 22); + uint32_t MiCommandOpcode : BITFIELD_RANGE(23, 28); + uint32_t CommandType : BITFIELD_RANGE(29, 31); + } Common; + uint32_t RawData[1]; + } TheStructure; + typedef enum tagMI_COMMAND_OPCODE { + MI_COMMAND_OPCODE_MI_BATCH_BUFFER_END = 0xa, + } MI_COMMAND_OPCODE; + typedef enum tagCOMMAND_TYPE { + COMMAND_TYPE_MI_COMMAND = 0x0, + } COMMAND_TYPE; + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.MiCommandOpcode = MI_COMMAND_OPCODE_MI_BATCH_BUFFER_END; + TheStructure.Common.CommandType = COMMAND_TYPE_MI_COMMAND; + } + static tagMI_BATCH_BUFFER_END sInit(void) { + MI_BATCH_BUFFER_END state; + state.init(); + return state; + } + inline uint32_t &getRawData(const uint32_t index) { + DEBUG_BREAK_IF(index >= 1); + return TheStructure.RawData[index]; + } + inline void setEndContext(const bool value) { + TheStructure.Common.EndContext = value; + } + inline bool getEndContext(void) const { + return (TheStructure.Common.EndContext); + } +} MI_BATCH_BUFFER_END; +STATIC_ASSERT(4 == sizeof(MI_BATCH_BUFFER_END)); + +typedef struct tagMI_LOAD_REGISTER_IMM { + union tagTheStructure { + struct tagCommon { + uint32_t DwordLength : BITFIELD_RANGE(0, 7); + uint32_t ByteWriteDisables : BITFIELD_RANGE(8, 11); + uint32_t Reserved_12 : BITFIELD_RANGE(12, 16); + uint32_t MmioRemapEnable : BITFIELD_RANGE(17, 17); + uint32_t Reserved_13 : BITFIELD_RANGE(18, 18); + uint32_t AddCsMmioStartOffset : BITFIELD_RANGE(19, 19); + uint32_t Reserved_20 : BITFIELD_RANGE(20, 22); + uint32_t MiCommandOpcode : BITFIELD_RANGE(23, 28); + uint32_t CommandType : BITFIELD_RANGE(29, 31); + uint32_t Reserved_32 : BITFIELD_RANGE(0, 1); + uint32_t RegisterOffset : BITFIELD_RANGE(2, 22); + uint32_t Reserved_55 : BITFIELD_RANGE(23, 31); + uint32_t DataDword; + } Common; + uint32_t RawData[3]; + } TheStructure; + typedef enum tagDWORD_LENGTH { + DWORD_LENGTH_EXCLUDES_DWORD_0_1 = 0x1, + } DWORD_LENGTH; + typedef enum tagMI_COMMAND_OPCODE { + MI_COMMAND_OPCODE_MI_LOAD_REGISTER_IMM = 0x22, + } MI_COMMAND_OPCODE; + typedef enum tagCOMMAND_TYPE { + COMMAND_TYPE_MI_COMMAND = 0x0, + } COMMAND_TYPE; + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.DwordLength = DWORD_LENGTH_EXCLUDES_DWORD_0_1; + TheStructure.Common.MiCommandOpcode = MI_COMMAND_OPCODE_MI_LOAD_REGISTER_IMM; + TheStructure.Common.CommandType = COMMAND_TYPE_MI_COMMAND; + } + static tagMI_LOAD_REGISTER_IMM sInit(void) { + MI_LOAD_REGISTER_IMM state; + state.init(); + return state; + } + inline uint32_t &getRawData(const uint32_t index) { + DEBUG_BREAK_IF(index >= 3); + return TheStructure.RawData[index]; + } + inline void setByteWriteDisables(const uint32_t value) { + TheStructure.Common.ByteWriteDisables = value; + } + inline uint32_t getByteWriteDisables(void) const { + return (TheStructure.Common.ByteWriteDisables); + } + inline void setAddCsMmioStartOffset(const uint32_t value) { + TheStructure.Common.AddCsMmioStartOffset = value; + } + inline uint32_t getAddCsMmioStartOffset(void) const { + return (TheStructure.Common.AddCsMmioStartOffset); + } + typedef enum tagREGISTEROFFSET { + REGISTEROFFSET_BIT_SHIFT = 0x2, + REGISTEROFFSET_ALIGN_SIZE = 0x4, + } REGISTEROFFSET; + inline void setRegisterOffset(const uint32_t value) { + TheStructure.Common.RegisterOffset = value >> REGISTEROFFSET_BIT_SHIFT; + } + inline uint32_t getRegisterOffset(void) const { + return (TheStructure.Common.RegisterOffset << REGISTEROFFSET_BIT_SHIFT); + } + inline void setDataDword(const uint32_t value) { + TheStructure.Common.DataDword = value; + } + inline uint32_t getDataDword(void) const { + return (TheStructure.Common.DataDword); + } + inline void setMmioRemapEnable(const bool value) { + TheStructure.Common.MmioRemapEnable = value; + } + inline bool getMmioRemapEnable(void) const { + return TheStructure.Common.MmioRemapEnable; + } +} MI_LOAD_REGISTER_IMM; +STATIC_ASSERT(12 == sizeof(MI_LOAD_REGISTER_IMM)); + +typedef struct tagMI_NOOP { + union tagTheStructure { + struct tagCommon { + uint32_t IdentificationNumber : BITFIELD_RANGE(0, 21); + uint32_t IdentificationNumberRegisterWriteEnable : BITFIELD_RANGE(22, 22); + uint32_t MiCommandOpcode : BITFIELD_RANGE(23, 28); + uint32_t CommandType : BITFIELD_RANGE(29, 31); + } Common; + uint32_t RawData[1]; + } TheStructure; + typedef enum tagMI_COMMAND_OPCODE { + MI_COMMAND_OPCODE_MI_NOOP = 0x0, + } MI_COMMAND_OPCODE; + typedef enum tagCOMMAND_TYPE { + COMMAND_TYPE_MI_COMMAND = 0x0, + } COMMAND_TYPE; + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.MiCommandOpcode = MI_COMMAND_OPCODE_MI_NOOP; + TheStructure.Common.CommandType = COMMAND_TYPE_MI_COMMAND; + } + static tagMI_NOOP sInit(void) { + MI_NOOP state; + state.init(); + return state; + } + inline uint32_t &getRawData(const uint32_t index) { + DEBUG_BREAK_IF(index >= 1); + return TheStructure.RawData[index]; + } + inline void setIdentificationNumber(const uint32_t value) { + TheStructure.Common.IdentificationNumber = value; + } + inline uint32_t getIdentificationNumber(void) const { + return (TheStructure.Common.IdentificationNumber); + } + inline void setIdentificationNumberRegisterWriteEnable(const bool value) { + TheStructure.Common.IdentificationNumberRegisterWriteEnable = value; + } + inline bool getIdentificationNumberRegisterWriteEnable(void) const { + return (TheStructure.Common.IdentificationNumberRegisterWriteEnable); + } +} MI_NOOP; +STATIC_ASSERT(4 == sizeof(MI_NOOP)); + +typedef struct tagRENDER_SURFACE_STATE { + union tagTheStructure { + struct tagCommon { + // DWORD 0 + uint32_t CubeFaceEnablePositiveZ : BITFIELD_RANGE(0, 0); + uint32_t CubeFaceEnableNegativeZ : BITFIELD_RANGE(1, 1); + uint32_t CubeFaceEnablePositiveY : BITFIELD_RANGE(2, 2); + uint32_t CubeFaceEnableNegativeY : BITFIELD_RANGE(3, 3); + uint32_t CubeFaceEnablePositiveX : BITFIELD_RANGE(4, 4); + uint32_t CubeFaceEnableNegativeX : BITFIELD_RANGE(5, 5); + uint32_t MediaBoundaryPixelMode : BITFIELD_RANGE(6, 7); + uint32_t RenderCacheReadWriteMode : BITFIELD_RANGE(8, 8); + uint32_t SamplerL2OutOfOrderModeDisable : BITFIELD_RANGE(9, 9); + uint32_t VerticalLineStrideOffset : BITFIELD_RANGE(10, 10); + uint32_t VerticalLineStride : BITFIELD_RANGE(11, 11); + uint32_t TileMode : BITFIELD_RANGE(12, 13); + uint32_t SurfaceHorizontalAlignment : BITFIELD_RANGE(14, 15); + uint32_t SurfaceVerticalAlignment : BITFIELD_RANGE(16, 17); + uint32_t SurfaceFormat : BITFIELD_RANGE(18, 26); + uint32_t AstcEnable : BITFIELD_RANGE(27, 27); + uint32_t SurfaceArray : BITFIELD_RANGE(28, 28); + uint32_t SurfaceType : BITFIELD_RANGE(29, 31); + // DWORD 1 + uint32_t SurfaceQpitch : BITFIELD_RANGE(0, 14); + uint32_t SampleTapDiscardDisable : BITFIELD_RANGE(15, 15); + uint32_t Reserved_48 : BITFIELD_RANGE(16, 16); + uint32_t DoubleFetchDisable : BITFIELD_RANGE(17, 17); + uint32_t CornerTexelMode : BITFIELD_RANGE(18, 18); + uint32_t BaseMipLevel : BITFIELD_RANGE(19, 23); + uint32_t MemoryObjectControlStateEncryptedData : BITFIELD_RANGE(24, 24); + uint32_t MemoryObjectControlStateIndexToMocsTables : BITFIELD_RANGE(25, 30); + uint32_t EnableUnormPathInColorPipe : BITFIELD_RANGE(31, 31); + // DWORD 2 + uint32_t Width : BITFIELD_RANGE(0, 13); + uint32_t Reserved_78 : BITFIELD_RANGE(14, 15); + uint32_t Height : BITFIELD_RANGE(16, 29); + uint32_t Reserved_94 : BITFIELD_RANGE(30, 30); + uint32_t DepthStencilResource : BITFIELD_RANGE(31, 31); + // DWORD 3 + uint32_t SurfacePitch : BITFIELD_RANGE(0, 17); + uint32_t NullProbingEnable : BITFIELD_RANGE(18, 18); + uint32_t Reserved_115 : BITFIELD_RANGE(19, 19); + uint32_t Reserved_116 : BITFIELD_RANGE(20, 20); + uint32_t Depth : BITFIELD_RANGE(21, 31); + // DWORD 4 + uint32_t MultisamplePositionPaletteIndex : BITFIELD_RANGE(0, 2); + uint32_t NumberOfMultisamples : BITFIELD_RANGE(3, 5); + uint32_t MultisampledSurfaceStorageFormat : BITFIELD_RANGE(6, 6); + uint32_t RenderTargetViewExtent : BITFIELD_RANGE(7, 17); + uint32_t MinimumArrayElement : BITFIELD_RANGE(18, 28); + uint32_t RenderTargetAndSampleUnormRotation : BITFIELD_RANGE(29, 30); + uint32_t Reserved_159 : BITFIELD_RANGE(31, 31); + // DWORD 5 + uint32_t MipCountLod : BITFIELD_RANGE(0, 3); + uint32_t SurfaceMinLod : BITFIELD_RANGE(4, 7); + uint32_t MipTailStartLod : BITFIELD_RANGE(8, 11); + uint32_t Reserved_172 : BITFIELD_RANGE(12, 13); + uint32_t CoherencyType : BITFIELD_RANGE(14, 14); + uint32_t Reserved_175 : BITFIELD_RANGE(15, 15); + uint32_t L1CachePolicyL1CacheControl : BITFIELD_RANGE(16, 18); + uint32_t Reserved_178 : BITFIELD_RANGE(19, 19); + uint32_t EwaDisableForCube : BITFIELD_RANGE(20, 20); + uint32_t YOffset : BITFIELD_RANGE(21, 23); + uint32_t Reserved_184 : BITFIELD_RANGE(24, 24); + uint32_t XOffset : BITFIELD_RANGE(25, 31); + // DWORD 6 + uint32_t Reserved_192 : BITFIELD_RANGE(0, 2); + uint32_t Reserved_195 : BITFIELD_RANGE(3, 12); + uint32_t Reserved_205 : BITFIELD_RANGE(13, 14); + uint32_t Reserved_207 : BITFIELD_RANGE(15, 15); + uint32_t Reserved_208 : BITFIELD_RANGE(16, 29); + uint32_t Reserved_222 : BITFIELD_RANGE(30, 30); + uint32_t Reserved_223 : BITFIELD_RANGE(31, 31); + // DWORD 7 + uint32_t ResourceMinLod : BITFIELD_RANGE(0, 11); + uint32_t Reserved_236 : BITFIELD_RANGE(12, 13); + uint32_t DisableSupportForMultiGpuAtomics : BITFIELD_RANGE(14, 14); + uint32_t DisableSupportForMultiGpuPartialWrites : BITFIELD_RANGE(15, 15); + uint32_t ShaderChannelSelectAlpha : BITFIELD_RANGE(16, 18); + uint32_t ShaderChannelSelectBlue : BITFIELD_RANGE(19, 21); + uint32_t ShaderChannelSelectGreen : BITFIELD_RANGE(22, 24); + uint32_t ShaderChannelSelectRed : BITFIELD_RANGE(25, 27); + uint32_t Reserved_252 : BITFIELD_RANGE(28, 28); + uint32_t Reserved_253 : BITFIELD_RANGE(29, 29); + uint32_t MemoryCompressionEnable : BITFIELD_RANGE(30, 30); + uint32_t MemoryCompressionMode : BITFIELD_RANGE(31, 31); + // DWORD 8, 9 + uint64_t SurfaceBaseAddress; + // DWORD 10, 11 + uint64_t QuiltWidth : BITFIELD_RANGE(0, 4); + uint64_t QuiltHeight : BITFIELD_RANGE(5, 9); + uint64_t ClearValueAddressEnable : BITFIELD_RANGE(10, 10); + uint64_t ProceduralTexture : BITFIELD_RANGE(11, 11); + uint64_t Reserved_332 : BITFIELD_RANGE(12, 63); + // DWORD 12 + uint32_t CompressionFormat : BITFIELD_RANGE(0, 4); + uint32_t Reserved_389 : BITFIELD_RANGE(5, 5); + uint32_t ClearColorAddress : BITFIELD_RANGE(6, 31); + // DWORD 13 + uint32_t ClearColorAddressHigh : BITFIELD_RANGE(0, 15); + uint32_t Reserved_432 : BITFIELD_RANGE(16, 30); + uint32_t DisallowLowQualityFlitering : BITFIELD_RANGE(31, 31); + // DWORD 14 + uint32_t Reserved_448; + // DWORD 15 + uint32_t Reserved_480; + } Common; + struct tag_SurfaceFormatIsnotPlanar { + // DWORD 0 + uint32_t Reserved_0 : BITFIELD_RANGE(0, 0); + uint32_t Reserved_1 : BITFIELD_RANGE(1, 1); + uint32_t Reserved_2 : BITFIELD_RANGE(2, 2); + uint32_t Reserved_3 : BITFIELD_RANGE(3, 3); + uint32_t Reserved_4 : BITFIELD_RANGE(4, 4); + uint32_t Reserved_5 : BITFIELD_RANGE(5, 5); + uint32_t Reserved_6 : BITFIELD_RANGE(6, 7); + uint32_t Reserved_8 : BITFIELD_RANGE(8, 8); + uint32_t Reserved_9 : BITFIELD_RANGE(9, 9); + uint32_t Reserved_10 : BITFIELD_RANGE(10, 10); + uint32_t Reserved_11 : BITFIELD_RANGE(11, 11); + uint32_t Reserved_12 : BITFIELD_RANGE(12, 13); + uint32_t Reserved_14 : BITFIELD_RANGE(14, 15); + uint32_t Reserved_16 : BITFIELD_RANGE(16, 17); + uint32_t Reserved_18 : BITFIELD_RANGE(18, 26); + uint32_t Reserved_27 : BITFIELD_RANGE(27, 27); + uint32_t Reserved_28 : BITFIELD_RANGE(28, 28); + uint32_t Reserved_29 : BITFIELD_RANGE(29, 31); + // DWORD 1 + uint32_t Reserved_32 : BITFIELD_RANGE(0, 14); + uint32_t Reserved_47 : BITFIELD_RANGE(15, 15); + uint32_t Reserved_48 : BITFIELD_RANGE(16, 16); + uint32_t Reserved_49 : BITFIELD_RANGE(17, 17); + uint32_t Reserved_50 : BITFIELD_RANGE(18, 18); + uint32_t Reserved_51 : BITFIELD_RANGE(19, 23); + uint32_t Reserved_56 : BITFIELD_RANGE(24, 24); + uint32_t Reserved_57 : BITFIELD_RANGE(25, 30); + uint32_t Reserved_63 : BITFIELD_RANGE(31, 31); + // DWORD 2 + uint32_t Reserved_64 : BITFIELD_RANGE(0, 13); + uint32_t Reserved_78 : BITFIELD_RANGE(14, 15); + uint32_t Reserved_80 : BITFIELD_RANGE(16, 29); + uint32_t Reserved_94 : BITFIELD_RANGE(30, 30); + uint32_t Reserved_95 : BITFIELD_RANGE(31, 31); + // DWORD 3 + uint32_t Reserved_96 : BITFIELD_RANGE(0, 17); + uint32_t Reserved_114 : BITFIELD_RANGE(18, 18); + uint32_t Reserved_115 : BITFIELD_RANGE(19, 19); + uint32_t Reserved_116 : BITFIELD_RANGE(20, 20); + uint32_t Reserved_117 : BITFIELD_RANGE(21, 31); + // DWORD 4 + uint32_t Reserved_128 : BITFIELD_RANGE(0, 2); + uint32_t Reserved_131 : BITFIELD_RANGE(3, 5); + uint32_t Reserved_134 : BITFIELD_RANGE(6, 6); + uint32_t Reserved_135 : BITFIELD_RANGE(7, 17); + uint32_t Reserved_146 : BITFIELD_RANGE(18, 28); + uint32_t Reserved_157 : BITFIELD_RANGE(29, 30); + uint32_t Reserved_159 : BITFIELD_RANGE(31, 31); + // DWORD 5 + uint32_t Reserved_160 : BITFIELD_RANGE(0, 3); + uint32_t Reserved_164 : BITFIELD_RANGE(4, 7); + uint32_t Reserved_168 : BITFIELD_RANGE(8, 11); + uint32_t Reserved_172 : BITFIELD_RANGE(12, 13); + uint32_t Reserved_174 : BITFIELD_RANGE(14, 14); + uint32_t Reserved_175 : BITFIELD_RANGE(15, 15); + uint32_t Reserved_176 : BITFIELD_RANGE(16, 17); + uint32_t Reserved_178 : BITFIELD_RANGE(18, 19); + uint32_t Reserved_180 : BITFIELD_RANGE(20, 20); + uint32_t Reserved_181 : BITFIELD_RANGE(21, 23); + uint32_t Reserved_184 : BITFIELD_RANGE(24, 24); + uint32_t Reserved_185 : BITFIELD_RANGE(25, 31); + // DWORD 6 + uint32_t AuxiliarySurfaceMode : BITFIELD_RANGE(0, 2); + uint32_t AuxiliarySurfacePitch : BITFIELD_RANGE(3, 12); + uint32_t Reserved_205 : BITFIELD_RANGE(13, 14); + uint32_t Reserved_207 : BITFIELD_RANGE(15, 15); + uint32_t AuxiliarySurfaceQpitch : BITFIELD_RANGE(16, 30); + uint32_t Reserved_223 : BITFIELD_RANGE(31, 31); + // DWORD 7 + uint32_t Reserved_224 : BITFIELD_RANGE(0, 11); + uint32_t Reserved_236 : BITFIELD_RANGE(12, 13); + uint32_t Reserved_238 : BITFIELD_RANGE(14, 15); + uint32_t Reserved_240 : BITFIELD_RANGE(16, 18); + uint32_t Reserved_243 : BITFIELD_RANGE(19, 21); + uint32_t Reserved_246 : BITFIELD_RANGE(22, 24); + uint32_t Reserved_249 : BITFIELD_RANGE(25, 27); + uint32_t Reserved_252 : BITFIELD_RANGE(28, 28); + uint32_t Reserved_253 : BITFIELD_RANGE(29, 29); + uint32_t Reserved_254 : BITFIELD_RANGE(30, 30); + uint32_t Reserved_255 : BITFIELD_RANGE(31, 31); + // DWORD 8, 9 + uint64_t Reserved_256; + // DWORD 10, 11 + uint64_t Reserved_320 : BITFIELD_RANGE(0, 4); + uint64_t Reserved_325 : BITFIELD_RANGE(5, 9); + uint64_t Reserved_330 : BITFIELD_RANGE(10, 10); + uint64_t Reserved_331 : BITFIELD_RANGE(11, 11); + uint64_t Reserved_332 : BITFIELD_RANGE(12, 63); + // DWORD 12 + uint32_t Reserved_384 : BITFIELD_RANGE(0, 4); + uint32_t Reserved_389 : BITFIELD_RANGE(5, 5); + uint32_t Reserved_390 : BITFIELD_RANGE(6, 31); + // DWORD 13 + uint32_t Reserved_416 : BITFIELD_RANGE(0, 15); + uint32_t Reserved_432 : BITFIELD_RANGE(16, 31); + // DWORD 14 + uint32_t Reserved_448; + // DWORD 15 + uint32_t Reserved_480; + } _SurfaceFormatIsnotPlanar; + struct tag_SurfaceFormatIsPlanar { + // DWORD 0 + uint32_t Reserved_0 : BITFIELD_RANGE(0, 0); + uint32_t Reserved_1 : BITFIELD_RANGE(1, 1); + uint32_t Reserved_2 : BITFIELD_RANGE(2, 2); + uint32_t Reserved_3 : BITFIELD_RANGE(3, 3); + uint32_t Reserved_4 : BITFIELD_RANGE(4, 4); + uint32_t Reserved_5 : BITFIELD_RANGE(5, 5); + uint32_t Reserved_6 : BITFIELD_RANGE(6, 7); + uint32_t Reserved_8 : BITFIELD_RANGE(8, 8); + uint32_t Reserved_9 : BITFIELD_RANGE(9, 9); + uint32_t Reserved_10 : BITFIELD_RANGE(10, 10); + uint32_t Reserved_11 : BITFIELD_RANGE(11, 11); + uint32_t Reserved_12 : BITFIELD_RANGE(12, 13); + uint32_t Reserved_14 : BITFIELD_RANGE(14, 15); + uint32_t Reserved_16 : BITFIELD_RANGE(16, 17); + uint32_t Reserved_18 : BITFIELD_RANGE(18, 26); + uint32_t Reserved_27 : BITFIELD_RANGE(27, 27); + uint32_t Reserved_28 : BITFIELD_RANGE(28, 28); + uint32_t Reserved_29 : BITFIELD_RANGE(29, 31); + // DWORD 1 + uint32_t Reserved_32 : BITFIELD_RANGE(0, 14); + uint32_t Reserved_47 : BITFIELD_RANGE(15, 15); + uint32_t Reserved_48 : BITFIELD_RANGE(16, 16); + uint32_t Reserved_49 : BITFIELD_RANGE(17, 17); + uint32_t Reserved_50 : BITFIELD_RANGE(18, 18); + uint32_t Reserved_51 : BITFIELD_RANGE(19, 23); + uint32_t Reserved_56 : BITFIELD_RANGE(24, 24); + uint32_t Reserved_57 : BITFIELD_RANGE(25, 30); + uint32_t Reserved_63 : BITFIELD_RANGE(31, 31); + // DWORD 2 + uint32_t Reserved_64 : BITFIELD_RANGE(0, 13); + uint32_t Reserved_78 : BITFIELD_RANGE(14, 15); + uint32_t Reserved_80 : BITFIELD_RANGE(16, 29); + uint32_t Reserved_94 : BITFIELD_RANGE(30, 30); + uint32_t Reserved_95 : BITFIELD_RANGE(31, 31); + // DWORD 3 + uint32_t Reserved_96 : BITFIELD_RANGE(0, 17); + uint32_t Reserved_114 : BITFIELD_RANGE(18, 18); + uint32_t Reserved_115 : BITFIELD_RANGE(19, 19); + uint32_t Reserved_116 : BITFIELD_RANGE(20, 20); + uint32_t Reserved_117 : BITFIELD_RANGE(21, 31); + // DWORD 4 + uint32_t Reserved_128 : BITFIELD_RANGE(0, 2); + uint32_t Reserved_131 : BITFIELD_RANGE(3, 5); + uint32_t Reserved_134 : BITFIELD_RANGE(6, 6); + uint32_t Reserved_135 : BITFIELD_RANGE(7, 17); + uint32_t Reserved_146 : BITFIELD_RANGE(18, 28); + uint32_t Reserved_157 : BITFIELD_RANGE(29, 30); + uint32_t Reserved_159 : BITFIELD_RANGE(31, 31); + // DWORD 5 + uint32_t Reserved_160 : BITFIELD_RANGE(0, 3); + uint32_t Reserved_164 : BITFIELD_RANGE(4, 7); + uint32_t Reserved_168 : BITFIELD_RANGE(8, 11); + uint32_t Reserved_172 : BITFIELD_RANGE(12, 13); + uint32_t Reserved_174 : BITFIELD_RANGE(14, 14); + uint32_t Reserved_175 : BITFIELD_RANGE(15, 15); + uint32_t Reserved_176 : BITFIELD_RANGE(16, 17); + uint32_t Reserved_178 : BITFIELD_RANGE(18, 19); + uint32_t Reserved_180 : BITFIELD_RANGE(20, 20); + uint32_t Reserved_181 : BITFIELD_RANGE(21, 23); + uint32_t Reserved_184 : BITFIELD_RANGE(24, 24); + uint32_t Reserved_185 : BITFIELD_RANGE(25, 31); + // DWORD 6 + uint32_t YOffsetForUOrUvPlane : BITFIELD_RANGE(0, 13); + uint32_t Reserved_206 : BITFIELD_RANGE(14, 14); + uint32_t Reserved_207 : BITFIELD_RANGE(15, 15); + uint32_t XOffsetForUOrUvPlane : BITFIELD_RANGE(16, 29); + uint32_t HalfPitchForChroma : BITFIELD_RANGE(30, 30); + uint32_t SeparateUvPlaneEnable : BITFIELD_RANGE(31, 31); + // DWORD 7 + uint32_t Reserved_224 : BITFIELD_RANGE(0, 11); + uint32_t Reserved_236 : BITFIELD_RANGE(12, 13); + uint32_t Reserved_238 : BITFIELD_RANGE(14, 15); + uint32_t Reserved_240 : BITFIELD_RANGE(16, 18); + uint32_t Reserved_243 : BITFIELD_RANGE(19, 21); + uint32_t Reserved_246 : BITFIELD_RANGE(22, 24); + uint32_t Reserved_249 : BITFIELD_RANGE(25, 27); + uint32_t Reserved_252 : BITFIELD_RANGE(28, 28); + uint32_t Reserved_253 : BITFIELD_RANGE(29, 29); + uint32_t Reserved_254 : BITFIELD_RANGE(30, 30); + uint32_t Reserved_255 : BITFIELD_RANGE(31, 31); + // DWORD 8, 9 + uint64_t Reserved_256; + // DWORD 10, 11 + uint64_t Reserved_320 : BITFIELD_RANGE(0, 4); + uint64_t Reserved_325 : BITFIELD_RANGE(5, 9); + uint64_t Reserved_330 : BITFIELD_RANGE(10, 10); + uint64_t Reserved_331 : BITFIELD_RANGE(11, 11); + uint64_t Reserved_332 : BITFIELD_RANGE(12, 31); + uint64_t YOffsetForVPlane : BITFIELD_RANGE(32, 45); + uint64_t Reserved_366 : BITFIELD_RANGE(46, 47); + uint64_t XOffsetForVPlane : BITFIELD_RANGE(48, 61); + uint64_t Reserved_382 : BITFIELD_RANGE(62, 63); + // DWORD 12 + uint32_t Reserved_384 : BITFIELD_RANGE(0, 4); + uint32_t Reserved_389 : BITFIELD_RANGE(5, 5); + uint32_t Reserved_390 : BITFIELD_RANGE(6, 31); + // DWORD 13 + uint32_t Reserved_416 : BITFIELD_RANGE(0, 15); + uint32_t Reserved_432 : BITFIELD_RANGE(16, 31); + // DWORD 14 + uint32_t Reserved_448; + // DWORD 15 + uint32_t Reserved_480; + } _SurfaceFormatIsPlanar; + struct tag_SurfaceFormatIsnotPlanarAndMemoryCompressionEnableIs0 { + // DWORD 0 + uint32_t Reserved_0 : BITFIELD_RANGE(0, 0); + uint32_t Reserved_1 : BITFIELD_RANGE(1, 1); + uint32_t Reserved_2 : BITFIELD_RANGE(2, 2); + uint32_t Reserved_3 : BITFIELD_RANGE(3, 3); + uint32_t Reserved_4 : BITFIELD_RANGE(4, 4); + uint32_t Reserved_5 : BITFIELD_RANGE(5, 5); + uint32_t Reserved_6 : BITFIELD_RANGE(6, 7); + uint32_t Reserved_8 : BITFIELD_RANGE(8, 8); + uint32_t Reserved_9 : BITFIELD_RANGE(9, 9); + uint32_t Reserved_10 : BITFIELD_RANGE(10, 10); + uint32_t Reserved_11 : BITFIELD_RANGE(11, 11); + uint32_t Reserved_12 : BITFIELD_RANGE(12, 13); + uint32_t Reserved_14 : BITFIELD_RANGE(14, 15); + uint32_t Reserved_16 : BITFIELD_RANGE(16, 17); + uint32_t Reserved_18 : BITFIELD_RANGE(18, 26); + uint32_t Reserved_27 : BITFIELD_RANGE(27, 27); + uint32_t Reserved_28 : BITFIELD_RANGE(28, 28); + uint32_t Reserved_29 : BITFIELD_RANGE(29, 31); + // DWORD 1 + uint32_t Reserved_32 : BITFIELD_RANGE(0, 14); + uint32_t Reserved_47 : BITFIELD_RANGE(15, 15); + uint32_t Reserved_48 : BITFIELD_RANGE(16, 16); + uint32_t Reserved_49 : BITFIELD_RANGE(17, 17); + uint32_t Reserved_50 : BITFIELD_RANGE(18, 18); + uint32_t Reserved_51 : BITFIELD_RANGE(19, 23); + uint32_t Reserved_56 : BITFIELD_RANGE(24, 24); + uint32_t Reserved_57 : BITFIELD_RANGE(25, 30); + uint32_t Reserved_63 : BITFIELD_RANGE(31, 31); + // DWORD 2 + uint32_t Reserved_64 : BITFIELD_RANGE(0, 13); + uint32_t Reserved_78 : BITFIELD_RANGE(14, 15); + uint32_t Reserved_80 : BITFIELD_RANGE(16, 29); + uint32_t Reserved_94 : BITFIELD_RANGE(30, 30); + uint32_t Reserved_95 : BITFIELD_RANGE(31, 31); + // DWORD 3 + uint32_t Reserved_96 : BITFIELD_RANGE(0, 17); + uint32_t Reserved_114 : BITFIELD_RANGE(18, 18); + uint32_t Reserved_115 : BITFIELD_RANGE(19, 19); + uint32_t Reserved_116 : BITFIELD_RANGE(20, 20); + uint32_t Reserved_117 : BITFIELD_RANGE(21, 31); + // DWORD 4 + uint32_t Reserved_128 : BITFIELD_RANGE(0, 2); + uint32_t Reserved_131 : BITFIELD_RANGE(3, 5); + uint32_t Reserved_134 : BITFIELD_RANGE(6, 6); + uint32_t Reserved_135 : BITFIELD_RANGE(7, 17); + uint32_t Reserved_146 : BITFIELD_RANGE(18, 28); + uint32_t Reserved_157 : BITFIELD_RANGE(29, 30); + uint32_t Reserved_159 : BITFIELD_RANGE(31, 31); + // DWORD 5 + uint32_t Reserved_160 : BITFIELD_RANGE(0, 3); + uint32_t Reserved_164 : BITFIELD_RANGE(4, 7); + uint32_t Reserved_168 : BITFIELD_RANGE(8, 11); + uint32_t Reserved_172 : BITFIELD_RANGE(12, 13); + uint32_t Reserved_174 : BITFIELD_RANGE(14, 14); + uint32_t Reserved_175 : BITFIELD_RANGE(15, 15); + uint32_t Reserved_176 : BITFIELD_RANGE(16, 17); + uint32_t Reserved_178 : BITFIELD_RANGE(18, 19); + uint32_t Reserved_180 : BITFIELD_RANGE(20, 20); + uint32_t Reserved_181 : BITFIELD_RANGE(21, 23); + uint32_t Reserved_184 : BITFIELD_RANGE(24, 24); + uint32_t Reserved_185 : BITFIELD_RANGE(25, 31); + // DWORD 6 + uint32_t Reserved_192 : BITFIELD_RANGE(0, 2); + uint32_t Reserved_195 : BITFIELD_RANGE(3, 12); + uint32_t Reserved_205 : BITFIELD_RANGE(13, 14); + uint32_t Reserved_207 : BITFIELD_RANGE(15, 15); + uint32_t Reserved_208 : BITFIELD_RANGE(16, 29); + uint32_t Reserved_222 : BITFIELD_RANGE(30, 30); + uint32_t Reserved_223 : BITFIELD_RANGE(31, 31); + // DWORD 7 + uint32_t Reserved_224 : BITFIELD_RANGE(0, 11); + uint32_t Reserved_236 : BITFIELD_RANGE(12, 13); + uint32_t Reserved_238 : BITFIELD_RANGE(14, 15); + uint32_t Reserved_240 : BITFIELD_RANGE(16, 18); + uint32_t Reserved_243 : BITFIELD_RANGE(19, 21); + uint32_t Reserved_246 : BITFIELD_RANGE(22, 24); + uint32_t Reserved_249 : BITFIELD_RANGE(25, 27); + uint32_t Reserved_252 : BITFIELD_RANGE(28, 28); + uint32_t Reserved_253 : BITFIELD_RANGE(29, 29); + uint32_t Reserved_254 : BITFIELD_RANGE(30, 30); + uint32_t Reserved_255 : BITFIELD_RANGE(31, 31); + // DWORD 8, 9 + uint64_t Reserved_256; + // DWORD 10, 11 + uint64_t Reserved_320 : BITFIELD_RANGE(0, 4); + uint64_t Reserved_325 : BITFIELD_RANGE(5, 9); + uint64_t Reserved_330 : BITFIELD_RANGE(10, 10); + uint64_t Reserved_331 : BITFIELD_RANGE(11, 11); + uint64_t AuxiliarySurfaceBaseAddress : BITFIELD_RANGE(12, 63); + // DWORD 12 + uint32_t Reserved_384 : BITFIELD_RANGE(0, 4); + uint32_t Reserved_389 : BITFIELD_RANGE(5, 5); + uint32_t Reserved_390 : BITFIELD_RANGE(6, 31); + // DWORD 13 + uint32_t Reserved_416 : BITFIELD_RANGE(0, 15); + uint32_t Reserved_432 : BITFIELD_RANGE(16, 31); + // DWORD 14 + uint32_t Reserved_448; + // DWORD 15 + uint32_t Reserved_480; + } _SurfaceFormatIsnotPlanarAndMemoryCompressionEnableIs0; + uint32_t RawData[16]; + } TheStructure; + typedef enum tagMEDIA_BOUNDARY_PIXEL_MODE { + MEDIA_BOUNDARY_PIXEL_MODE_NORMAL_MODE = 0x0, + MEDIA_BOUNDARY_PIXEL_MODE_PROGRESSIVE_FRAME = 0x2, + MEDIA_BOUNDARY_PIXEL_MODE_INTERLACED_FRAME = 0x3, + } MEDIA_BOUNDARY_PIXEL_MODE; + typedef enum tagRENDER_CACHE_READ_WRITE_MODE { + RENDER_CACHE_READ_WRITE_MODE_WRITE_ONLY_CACHE = 0x0, + RENDER_CACHE_READ_WRITE_MODE_READ_WRITE_CACHE = 0x1, + } RENDER_CACHE_READ_WRITE_MODE; + typedef enum tagTILE_MODE { + TILE_MODE_LINEAR = 0x0, + TILE_MODE_WMAJOR = 0x1, + TILE_MODE_XMAJOR = 0x2, + TILE_MODE_YMAJOR = 0x3, + } TILE_MODE; + typedef enum tagSURFACE_HORIZONTAL_ALIGNMENT { + SURFACE_HORIZONTAL_ALIGNMENT_HALIGN_4 = 0x1, + SURFACE_HORIZONTAL_ALIGNMENT_HALIGN_8 = 0x2, + SURFACE_HORIZONTAL_ALIGNMENT_HALIGN_16 = 0x3, + } SURFACE_HORIZONTAL_ALIGNMENT; + typedef enum tagSURFACE_VERTICAL_ALIGNMENT { + SURFACE_VERTICAL_ALIGNMENT_VALIGN_4 = 0x1, + SURFACE_VERTICAL_ALIGNMENT_VALIGN_8 = 0x2, + SURFACE_VERTICAL_ALIGNMENT_VALIGN_16 = 0x3, + } SURFACE_VERTICAL_ALIGNMENT; + typedef enum tagSURFACE_FORMAT { + SURFACE_FORMAT_R32G32B32A32_FLOAT = 0x0, + SURFACE_FORMAT_R32G32B32A32_SINT = 0x1, + SURFACE_FORMAT_R32G32B32A32_UINT = 0x2, + SURFACE_FORMAT_R32G32B32A32_UNORM = 0x3, + SURFACE_FORMAT_R32G32B32A32_SNORM = 0x4, + SURFACE_FORMAT_R64G64_FLOAT = 0x5, + SURFACE_FORMAT_R32G32B32X32_FLOAT = 0x6, + SURFACE_FORMAT_R32G32B32A32_SSCALED = 0x7, + SURFACE_FORMAT_R32G32B32A32_USCALED = 0x8, + SURFACE_FORMAT_PLANAR_422_8_P208 = 0xc, + SURFACE_FORMAT_PLANAR_420_8_SAMPLE_8X8 = 0xd, + SURFACE_FORMAT_PLANAR_411_8 = 0xe, + SURFACE_FORMAT_PLANAR_422_8 = 0xf, + SURFACE_FORMAT_R8G8B8A8_UNORM_VDI = 0x10, + SURFACE_FORMAT_YCRCB_NORMAL_SAMPLE_8X8 = 0x11, + SURFACE_FORMAT_YCRCB_SWAPUVY_SAMPLE_8X8 = 0x12, + SURFACE_FORMAT_YCRCB_SWAPUV_SAMPLE_8X8 = 0x13, + SURFACE_FORMAT_YCRCB_SWAPY_SAMPLE_8X8 = 0x14, + SURFACE_FORMAT_R32G32B32A32_FLOAT_LD = 0x15, + SURFACE_FORMAT_PLANAR_420_16_SAMPLE_8X8 = 0x16, + SURFACE_FORMAT_R16B16_UNORM_SAMPLE_8X8 = 0x17, + SURFACE_FORMAT_Y16_UNORM_SAMPLE_8X8 = 0x18, + SURFACE_FORMAT_PLANAR_Y32_UNORM = 0x19, + SURFACE_FORMAT_R32G32B32A32_SFIXED = 0x20, + SURFACE_FORMAT_R64G64_PASSTHRU = 0x21, + SURFACE_FORMAT_R32G32B32_FLOAT = 0x40, + SURFACE_FORMAT_R32G32B32_SINT = 0x41, + SURFACE_FORMAT_R32G32B32_UINT = 0x42, + SURFACE_FORMAT_R32G32B32_UNORM = 0x43, + SURFACE_FORMAT_R32G32B32_SNORM = 0x44, + SURFACE_FORMAT_R32G32B32_SSCALED = 0x45, + SURFACE_FORMAT_R32G32B32_USCALED = 0x46, + SURFACE_FORMAT_R32G32B32_FLOAT_LD = 0x47, + SURFACE_FORMAT_R32G32B32_SFIXED = 0x50, + SURFACE_FORMAT_R16G16B16A16_UNORM = 0x80, + SURFACE_FORMAT_R16G16B16A16_SNORM = 0x81, + SURFACE_FORMAT_R16G16B16A16_SINT = 0x82, + SURFACE_FORMAT_R16G16B16A16_UINT = 0x83, + SURFACE_FORMAT_R16G16B16A16_FLOAT = 0x84, + SURFACE_FORMAT_R32G32_FLOAT = 0x85, + SURFACE_FORMAT_R32G32_SINT = 0x86, + SURFACE_FORMAT_R32G32_UINT = 0x87, + SURFACE_FORMAT_R32_FLOAT_X8X24_TYPELESS = 0x88, + SURFACE_FORMAT_X32_TYPELESS_G8X24_UINT = 0x89, + SURFACE_FORMAT_L32A32_FLOAT = 0x8a, + SURFACE_FORMAT_R32G32_UNORM = 0x8b, + SURFACE_FORMAT_R32G32_SNORM = 0x8c, + SURFACE_FORMAT_R64_FLOAT = 0x8d, + SURFACE_FORMAT_R16G16B16X16_UNORM = 0x8e, + SURFACE_FORMAT_R16G16B16X16_FLOAT = 0x8f, + SURFACE_FORMAT_A32X32_FLOAT = 0x90, + SURFACE_FORMAT_L32X32_FLOAT = 0x91, + SURFACE_FORMAT_I32X32_FLOAT = 0x92, + SURFACE_FORMAT_R16G16B16A16_SSCALED = 0x93, + SURFACE_FORMAT_R16G16B16A16_USCALED = 0x94, + SURFACE_FORMAT_R32G32_SSCALED = 0x95, + SURFACE_FORMAT_R32G32_USCALED = 0x96, + SURFACE_FORMAT_R32G32_FLOAT_LD = 0x97, + SURFACE_FORMAT_R32_FLOAT_X8X24_TYPELESS_LD = 0x98, + SURFACE_FORMAT_R32G32_SFIXED = 0xa0, + SURFACE_FORMAT_R64_PASSTHRU = 0xa1, + SURFACE_FORMAT_B8G8R8A8_UNORM = 0xc0, + SURFACE_FORMAT_B8G8R8A8_UNORM_SRGB = 0xc1, + SURFACE_FORMAT_R10G10B10A2_UNORM = 0xc2, + SURFACE_FORMAT_R10G10B10A2_UNORM_SRGB = 0xc3, + SURFACE_FORMAT_R10G10B10A2_UINT = 0xc4, + SURFACE_FORMAT_R10G10B10_SNORM_A2_UNORM = 0xc5, + SURFACE_FORMAT_R10G10B10A2_UNORM_SAMPLE_8X8 = 0xc6, + SURFACE_FORMAT_R8G8B8A8_UNORM = 0xc7, + SURFACE_FORMAT_R8G8B8A8_UNORM_SRGB = 0xc8, + SURFACE_FORMAT_R8G8B8A8_SNORM = 0xc9, + SURFACE_FORMAT_R8G8B8A8_SINT = 0xca, + SURFACE_FORMAT_R8G8B8A8_UINT = 0xcb, + SURFACE_FORMAT_R16G16_UNORM = 0xcc, + SURFACE_FORMAT_R16G16_SNORM = 0xcd, + SURFACE_FORMAT_R16G16_SINT = 0xce, + SURFACE_FORMAT_R16G16_UINT = 0xcf, + SURFACE_FORMAT_R16G16_FLOAT = 0xd0, + SURFACE_FORMAT_B10G10R10A2_UNORM = 0xd1, + SURFACE_FORMAT_B10G10R10A2_UNORM_SRGB = 0xd2, + SURFACE_FORMAT_R11G11B10_FLOAT = 0xd3, + SURFACE_FORMAT_R10G10B10_FLOAT_A2_UNORM = 0xd5, + SURFACE_FORMAT_R32_SINT = 0xd6, + SURFACE_FORMAT_R32_UINT = 0xd7, + SURFACE_FORMAT_R32_FLOAT = 0xd8, + SURFACE_FORMAT_R24_UNORM_X8_TYPELESS = 0xd9, + SURFACE_FORMAT_X24_TYPELESS_G8_UINT = 0xda, + SURFACE_FORMAT_R32_FLOAT_LD = 0xdb, + SURFACE_FORMAT_R24_UNORM_X8_TYPELESS_LD = 0xdc, + SURFACE_FORMAT_L32_UNORM = 0xdd, + SURFACE_FORMAT_A32_UNORM = 0xde, + SURFACE_FORMAT_L16A16_UNORM = 0xdf, + SURFACE_FORMAT_I24X8_UNORM = 0xe0, + SURFACE_FORMAT_L24X8_UNORM = 0xe1, + SURFACE_FORMAT_A24X8_UNORM = 0xe2, + SURFACE_FORMAT_I32_FLOAT = 0xe3, + SURFACE_FORMAT_L32_FLOAT = 0xe4, + SURFACE_FORMAT_A32_FLOAT = 0xe5, + SURFACE_FORMAT_X8B8_UNORM_G8R8_SNORM = 0xe6, + SURFACE_FORMAT_A8X8_UNORM_G8R8_SNORM = 0xe7, + SURFACE_FORMAT_B8X8_UNORM_G8R8_SNORM = 0xe8, + SURFACE_FORMAT_B8G8R8X8_UNORM = 0xe9, + SURFACE_FORMAT_B8G8R8X8_UNORM_SRGB = 0xea, + SURFACE_FORMAT_R8G8B8X8_UNORM = 0xeb, + SURFACE_FORMAT_R8G8B8X8_UNORM_SRGB = 0xec, + SURFACE_FORMAT_R9G9B9E5_SHAREDEXP = 0xed, + SURFACE_FORMAT_B10G10R10X2_UNORM = 0xee, + SURFACE_FORMAT_L16A16_FLOAT = 0xf0, + SURFACE_FORMAT_R32_UNORM = 0xf1, + SURFACE_FORMAT_R32_SNORM = 0xf2, + SURFACE_FORMAT_R10G10B10X2_USCALED = 0xf3, + SURFACE_FORMAT_R8G8B8A8_SSCALED = 0xf4, + SURFACE_FORMAT_R8G8B8A8_USCALED = 0xf5, + SURFACE_FORMAT_R16G16_SSCALED = 0xf6, + SURFACE_FORMAT_R16G16_USCALED = 0xf7, + SURFACE_FORMAT_R32_SSCALED = 0xf8, + SURFACE_FORMAT_R32_USCALED = 0xf9, + SURFACE_FORMAT_R8B8G8A8_UNORM = 0xfa, + SURFACE_FORMAT_R8G8B8A8_SINT_NOA = 0xfb, + SURFACE_FORMAT_R8G8B8A8_UINT_NOA = 0xfc, + SURFACE_FORMAT_R8G8B8A8_UNORM_YUV = 0xfd, + SURFACE_FORMAT_R8G8B8A8_UNORM_SNCK = 0xfe, + SURFACE_FORMAT_R8G8B8A8_UNORM_NOA = 0xff, + SURFACE_FORMAT_B5G6R5_UNORM = 0x100, + SURFACE_FORMAT_B5G6R5_UNORM_SRGB = 0x101, + SURFACE_FORMAT_B5G5R5A1_UNORM = 0x102, + SURFACE_FORMAT_B5G5R5A1_UNORM_SRGB = 0x103, + SURFACE_FORMAT_B4G4R4A4_UNORM = 0x104, + SURFACE_FORMAT_B4G4R4A4_UNORM_SRGB = 0x105, + SURFACE_FORMAT_R8G8_UNORM = 0x106, + SURFACE_FORMAT_R8G8_SNORM = 0x107, + SURFACE_FORMAT_R8G8_SINT = 0x108, + SURFACE_FORMAT_R8G8_UINT = 0x109, + SURFACE_FORMAT_R16_UNORM = 0x10a, + SURFACE_FORMAT_R16_SNORM = 0x10b, + SURFACE_FORMAT_R16_SINT = 0x10c, + SURFACE_FORMAT_R16_UINT = 0x10d, + SURFACE_FORMAT_R16_FLOAT = 0x10e, + SURFACE_FORMAT_A8P8_UNORM_PALETTE0 = 0x10f, + SURFACE_FORMAT_A8P8_UNORM_PALETTE1 = 0x110, + SURFACE_FORMAT_I16_UNORM = 0x111, + SURFACE_FORMAT_L16_UNORM = 0x112, + SURFACE_FORMAT_A16_UNORM = 0x113, + SURFACE_FORMAT_L8A8_UNORM = 0x114, + SURFACE_FORMAT_I16_FLOAT = 0x115, + SURFACE_FORMAT_L16_FLOAT = 0x116, + SURFACE_FORMAT_A16_FLOAT = 0x117, + SURFACE_FORMAT_L8A8_UNORM_SRGB = 0x118, + SURFACE_FORMAT_R5G5_SNORM_B6_UNORM = 0x119, + SURFACE_FORMAT_B5G5R5X1_UNORM = 0x11a, + SURFACE_FORMAT_B5G5R5X1_UNORM_SRGB = 0x11b, + SURFACE_FORMAT_R8G8_SSCALED = 0x11c, + SURFACE_FORMAT_R8G8_USCALED = 0x11d, + SURFACE_FORMAT_R16_SSCALED = 0x11e, + SURFACE_FORMAT_R16_USCALED = 0x11f, + SURFACE_FORMAT_R8G8_SNORM_DX9 = 0x120, + SURFACE_FORMAT_R16_FLOAT_DX9 = 0x121, + SURFACE_FORMAT_P8A8_UNORM_PALETTE0 = 0x122, + SURFACE_FORMAT_P8A8_UNORM_PALETTE1 = 0x123, + SURFACE_FORMAT_A1B5G5R5_UNORM = 0x124, + SURFACE_FORMAT_A4B4G4R4_UNORM = 0x125, + SURFACE_FORMAT_L8A8_UINT = 0x126, + SURFACE_FORMAT_L8A8_SINT = 0x127, + SURFACE_FORMAT_R8_UNORM = 0x140, + SURFACE_FORMAT_R8_SNORM = 0x141, + SURFACE_FORMAT_R8_SINT = 0x142, + SURFACE_FORMAT_R8_UINT = 0x143, + SURFACE_FORMAT_A8_UNORM = 0x144, + SURFACE_FORMAT_I8_UNORM = 0x145, + SURFACE_FORMAT_L8_UNORM = 0x146, + SURFACE_FORMAT_P4A4_UNORM_PALETTE0 = 0x147, + SURFACE_FORMAT_A4P4_UNORM_PALETTE0 = 0x148, + SURFACE_FORMAT_R8_SSCALED = 0x149, + SURFACE_FORMAT_R8_USCALED = 0x14a, + SURFACE_FORMAT_P8_UNORM_PALETTE0 = 0x14b, + SURFACE_FORMAT_L8_UNORM_SRGB = 0x14c, + SURFACE_FORMAT_P8_UNORM_PALETTE1 = 0x14d, + SURFACE_FORMAT_P4A4_UNORM_PALETTE1 = 0x14e, + SURFACE_FORMAT_A4P4_UNORM_PALETTE1 = 0x14f, + SURFACE_FORMAT_Y8_UNORM = 0x150, + SURFACE_FORMAT_L8_UINT = 0x152, + SURFACE_FORMAT_L8_SINT = 0x153, + SURFACE_FORMAT_I8_UINT = 0x154, + SURFACE_FORMAT_I8_SINT = 0x155, + SURFACE_FORMAT_DXT1_RGB_SRGB = 0x180, + SURFACE_FORMAT_R1_UNORM = 0x181, + SURFACE_FORMAT_YCRCB_NORMAL = 0x182, + SURFACE_FORMAT_YCRCB_SWAPUVY = 0x183, + SURFACE_FORMAT_P2_UNORM_PALETTE0 = 0x184, + SURFACE_FORMAT_P2_UNORM_PALETTE1 = 0x185, + SURFACE_FORMAT_BC1_UNORM = 0x186, + SURFACE_FORMAT_BC2_UNORM = 0x187, + SURFACE_FORMAT_BC3_UNORM = 0x188, + SURFACE_FORMAT_BC4_UNORM = 0x189, + SURFACE_FORMAT_BC5_UNORM = 0x18a, + SURFACE_FORMAT_BC1_UNORM_SRGB = 0x18b, + SURFACE_FORMAT_BC2_UNORM_SRGB = 0x18c, + SURFACE_FORMAT_BC3_UNORM_SRGB = 0x18d, + SURFACE_FORMAT_MONO8 = 0x18e, + SURFACE_FORMAT_YCRCB_SWAPUV = 0x18f, + SURFACE_FORMAT_YCRCB_SWAPY = 0x190, + SURFACE_FORMAT_DXT1_RGB = 0x191, + SURFACE_FORMAT_FXT1 = 0x192, + SURFACE_FORMAT_R8G8B8_UNORM = 0x193, + SURFACE_FORMAT_R8G8B8_SNORM = 0x194, + SURFACE_FORMAT_R8G8B8_SSCALED = 0x195, + SURFACE_FORMAT_R8G8B8_USCALED = 0x196, + SURFACE_FORMAT_R64G64B64A64_FLOAT = 0x197, + SURFACE_FORMAT_R64G64B64_FLOAT = 0x198, + SURFACE_FORMAT_BC4_SNORM = 0x199, + SURFACE_FORMAT_BC5_SNORM = 0x19a, + SURFACE_FORMAT_R16G16B16_FLOAT = 0x19b, + SURFACE_FORMAT_R16G16B16_UNORM = 0x19c, + SURFACE_FORMAT_R16G16B16_SNORM = 0x19d, + SURFACE_FORMAT_R16G16B16_SSCALED = 0x19e, + SURFACE_FORMAT_R16G16B16_USCALED = 0x19f, + SURFACE_FORMAT_R8B8_UNORM = 0x1a0, + SURFACE_FORMAT_BC6H_SF16 = 0x1a1, + SURFACE_FORMAT_BC7_UNORM = 0x1a2, + SURFACE_FORMAT_BC7_UNORM_SRGB = 0x1a3, + SURFACE_FORMAT_BC6H_UF16 = 0x1a4, + SURFACE_FORMAT_PLANAR_420_8 = 0x1a5, + SURFACE_FORMAT_PLANAR_420_16 = 0x1a6, + SURFACE_FORMAT_R8G8B8_UNORM_SRGB = 0x1a8, + SURFACE_FORMAT_ETC1_RGB8 = 0x1a9, + SURFACE_FORMAT_ETC2_RGB8 = 0x1aa, + SURFACE_FORMAT_EAC_R11 = 0x1ab, + SURFACE_FORMAT_EAC_RG11 = 0x1ac, + SURFACE_FORMAT_EAC_SIGNED_R11 = 0x1ad, + SURFACE_FORMAT_EAC_SIGNED_RG11 = 0x1ae, + SURFACE_FORMAT_ETC2_SRGB8 = 0x1af, + SURFACE_FORMAT_R16G16B16_UINT = 0x1b0, + SURFACE_FORMAT_R16G16B16_SINT = 0x1b1, + SURFACE_FORMAT_R32_SFIXED = 0x1b2, + SURFACE_FORMAT_R10G10B10A2_SNORM = 0x1b3, + SURFACE_FORMAT_R10G10B10A2_USCALED = 0x1b4, + SURFACE_FORMAT_R10G10B10A2_SSCALED = 0x1b5, + SURFACE_FORMAT_R10G10B10A2_SINT = 0x1b6, + SURFACE_FORMAT_B10G10R10A2_SNORM = 0x1b7, + SURFACE_FORMAT_B10G10R10A2_USCALED = 0x1b8, + SURFACE_FORMAT_B10G10R10A2_SSCALED = 0x1b9, + SURFACE_FORMAT_B10G10R10A2_UINT = 0x1ba, + SURFACE_FORMAT_B10G10R10A2_SINT = 0x1bb, + SURFACE_FORMAT_R64G64B64A64_PASSTHRU = 0x1bc, + SURFACE_FORMAT_R64G64B64_PASSTHRU = 0x1bd, + SURFACE_FORMAT_ETC2_RGB8_PTA = 0x1c0, + SURFACE_FORMAT_ETC2_SRGB8_PTA = 0x1c1, + SURFACE_FORMAT_ETC2_EAC_RGBA8 = 0x1c2, + SURFACE_FORMAT_ETC2_EAC_SRGB8_A8 = 0x1c3, + SURFACE_FORMAT_R8G8B8_UINT = 0x1c8, + SURFACE_FORMAT_R8G8B8_SINT = 0x1c9, + SURFACE_FORMAT_RAW = 0x1ff, + } SURFACE_FORMAT; + typedef enum tagSURFACE_TYPE { + SURFACE_TYPE_SURFTYPE_1D = 0x0, + SURFACE_TYPE_SURFTYPE_2D = 0x1, + SURFACE_TYPE_SURFTYPE_3D = 0x2, + SURFACE_TYPE_SURFTYPE_CUBE = 0x3, + SURFACE_TYPE_SURFTYPE_BUFFER = 0x4, + SURFACE_TYPE_SURFTYPE_STRBUF = 0x5, + SURFACE_TYPE_SURFTYPE_SCRATCH = 0x6, + SURFACE_TYPE_SURFTYPE_NULL = 0x7, + } SURFACE_TYPE; + typedef enum tagSAMPLE_TAP_DISCARD_DISABLE { + SAMPLE_TAP_DISCARD_DISABLE_DISABLE = 0x0, + SAMPLE_TAP_DISCARD_DISABLE_ENABLE = 0x1, + } SAMPLE_TAP_DISCARD_DISABLE; + typedef enum tagNULL_PROBING_ENABLE { + NULL_PROBING_ENABLE_DISABLE = 0x0, + NULL_PROBING_ENABLE_ENABLE = 0x1, + } NULL_PROBING_ENABLE; + typedef enum tagNUMBER_OF_MULTISAMPLES { + NUMBER_OF_MULTISAMPLES_MULTISAMPLECOUNT_1 = 0x0, + NUMBER_OF_MULTISAMPLES_MULTISAMPLECOUNT_2 = 0x1, + NUMBER_OF_MULTISAMPLES_MULTISAMPLECOUNT_4 = 0x2, + NUMBER_OF_MULTISAMPLES_MULTISAMPLECOUNT_8 = 0x3, + NUMBER_OF_MULTISAMPLES_MULTISAMPLECOUNT_16 = 0x4, + } NUMBER_OF_MULTISAMPLES; + typedef enum tagMULTISAMPLED_SURFACE_STORAGE_FORMAT { + MULTISAMPLED_SURFACE_STORAGE_FORMAT_MSS = 0x0, + MULTISAMPLED_SURFACE_STORAGE_FORMAT_DEPTH_STENCIL = 0x1, + } MULTISAMPLED_SURFACE_STORAGE_FORMAT; + typedef enum tagRENDER_TARGET_AND_SAMPLE_UNORM_ROTATION { + RENDER_TARGET_AND_SAMPLE_UNORM_ROTATION_0DEG = 0x0, + RENDER_TARGET_AND_SAMPLE_UNORM_ROTATION_90DEG = 0x1, + RENDER_TARGET_AND_SAMPLE_UNORM_ROTATION_180DEG = 0x2, + RENDER_TARGET_AND_SAMPLE_UNORM_ROTATION_270DEG = 0x3, + } RENDER_TARGET_AND_SAMPLE_UNORM_ROTATION; + typedef enum tagCOHERENCY_TYPE { + COHERENCY_TYPE_GPU_COHERENT = 0x0, + COHERENCY_TYPE_IA_COHERENT = 0x1, + } COHERENCY_TYPE; + typedef enum tagTILED_RESOURCE_MODE { + TILED_RESOURCE_MODE_NONE = 0x0, + TILED_RESOURCE_MODE_4KB = 0x1, + TILED_RESOURCE_MODE_TILEYF = 0x1, + TILED_RESOURCE_MODE_64KB = 0x2, + TILED_RESOURCE_MODE_TILEYS = 0x2, + } TILED_RESOURCE_MODE; + typedef enum tagAUXILIARY_SURFACE_MODE { + AUXILIARY_SURFACE_MODE_AUX_NONE = 0x0, + AUXILIARY_SURFACE_MODE_AUX_CCS_D = 0x1, + AUXILIARY_SURFACE_MODE_AUX_APPEND = 0x2, + AUXILIARY_SURFACE_MODE_AUX_MCS_LCE = 0x4, + AUXILIARY_SURFACE_MODE_AUX_CCS_E = 0x5, + } AUXILIARY_SURFACE_MODE; + typedef enum tagHALF_PITCH_FOR_CHROMA { + HALF_PITCH_FOR_CHROMA_DISABLE = 0x0, + HALF_PITCH_FOR_CHROMA_ENABLE = 0x1, + } HALF_PITCH_FOR_CHROMA; + typedef enum tagSHADER_CHANNEL_SELECT { + SHADER_CHANNEL_SELECT_ZERO = 0x0, + SHADER_CHANNEL_SELECT_ONE = 0x1, + SHADER_CHANNEL_SELECT_RED = 0x4, + SHADER_CHANNEL_SELECT_GREEN = 0x5, + SHADER_CHANNEL_SELECT_BLUE = 0x6, + SHADER_CHANNEL_SELECT_ALPHA = 0x7, + } SHADER_CHANNEL_SELECT; + typedef enum tagMEMORY_COMPRESSION_MODE { + MEMORY_COMPRESSION_MODE_HORIZONTAL = 0x0, + } MEMORY_COMPRESSION_MODE; + typedef enum tagL1_CACHE_POLICY { + L1_CACHE_POLICY_WBP = 0x0, + L1_CACHE_POLICY_UC = 0x1, + L1_CACHE_POLICY_WB = 0x2, + L1_CACHE_POLICY_WT = 0x3, + L1_CACHE_POLICY_WS = 0x4, + } L1_CACHE_POLICY; + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.MediaBoundaryPixelMode = MEDIA_BOUNDARY_PIXEL_MODE_NORMAL_MODE; + TheStructure.Common.RenderCacheReadWriteMode = RENDER_CACHE_READ_WRITE_MODE_WRITE_ONLY_CACHE; + TheStructure.Common.TileMode = TILE_MODE_LINEAR; + TheStructure.Common.SurfaceHorizontalAlignment = SURFACE_HORIZONTAL_ALIGNMENT_HALIGN_16; + TheStructure.Common.SurfaceVerticalAlignment = SURFACE_VERTICAL_ALIGNMENT_VALIGN_4; + TheStructure.Common.SurfaceType = SURFACE_TYPE_SURFTYPE_1D; + TheStructure.Common.SampleTapDiscardDisable = SAMPLE_TAP_DISCARD_DISABLE_DISABLE; + TheStructure.Common.NullProbingEnable = NULL_PROBING_ENABLE_DISABLE; + TheStructure.Common.NumberOfMultisamples = NUMBER_OF_MULTISAMPLES_MULTISAMPLECOUNT_1; + TheStructure.Common.MultisampledSurfaceStorageFormat = MULTISAMPLED_SURFACE_STORAGE_FORMAT_MSS; + TheStructure.Common.RenderTargetAndSampleUnormRotation = RENDER_TARGET_AND_SAMPLE_UNORM_ROTATION_0DEG; + TheStructure.Common.CoherencyType = COHERENCY_TYPE_GPU_COHERENT; + TheStructure.Common.MemoryCompressionMode = MEMORY_COMPRESSION_MODE_HORIZONTAL; + TheStructure._SurfaceFormatIsnotPlanar.AuxiliarySurfaceMode = AUXILIARY_SURFACE_MODE_AUX_NONE; + TheStructure._SurfaceFormatIsPlanar.HalfPitchForChroma = HALF_PITCH_FOR_CHROMA_DISABLE; + TheStructure.Common.DisableSupportForMultiGpuAtomics = 1; + TheStructure.Common.DisableSupportForMultiGpuPartialWrites = 1; + TheStructure.Common.L1CachePolicyL1CacheControl = L1_CACHE_POLICY::L1_CACHE_POLICY_WBP; + } + static tagRENDER_SURFACE_STATE sInit(void) { + RENDER_SURFACE_STATE state; + state.init(); + return state; + } + inline uint32_t &getRawData(const uint32_t index) { + UNRECOVERABLE_IF(index >= 16); + return TheStructure.RawData[index]; + } + inline void setCubeFaceEnablePositiveZ(const bool value) { + TheStructure.Common.CubeFaceEnablePositiveZ = value; + } + inline bool getCubeFaceEnablePositiveZ(void) const { + return TheStructure.Common.CubeFaceEnablePositiveZ; + } + inline void setCubeFaceEnableNegativeZ(const bool value) { + TheStructure.Common.CubeFaceEnableNegativeZ = value; + } + inline bool getCubeFaceEnableNegativeZ(void) const { + return TheStructure.Common.CubeFaceEnableNegativeZ; + } + inline void setCubeFaceEnablePositiveY(const bool value) { + TheStructure.Common.CubeFaceEnablePositiveY = value; + } + inline bool getCubeFaceEnablePositiveY(void) const { + return TheStructure.Common.CubeFaceEnablePositiveY; + } + inline void setCubeFaceEnableNegativeY(const bool value) { + TheStructure.Common.CubeFaceEnableNegativeY = value; + } + inline bool getCubeFaceEnableNegativeY(void) const { + return TheStructure.Common.CubeFaceEnableNegativeY; + } + inline void setCubeFaceEnablePositiveX(const bool value) { + TheStructure.Common.CubeFaceEnablePositiveX = value; + } + inline bool getCubeFaceEnablePositiveX(void) const { + return TheStructure.Common.CubeFaceEnablePositiveX; + } + inline void setCubeFaceEnableNegativeX(const bool value) { + TheStructure.Common.CubeFaceEnableNegativeX = value; + } + inline bool getCubeFaceEnableNegativeX(void) const { + return TheStructure.Common.CubeFaceEnableNegativeX; + } + inline void setMediaBoundaryPixelMode(const MEDIA_BOUNDARY_PIXEL_MODE value) { + TheStructure.Common.MediaBoundaryPixelMode = value; + } + inline MEDIA_BOUNDARY_PIXEL_MODE getMediaBoundaryPixelMode(void) const { + return static_cast(TheStructure.Common.MediaBoundaryPixelMode); + } + inline void setRenderCacheReadWriteMode(const RENDER_CACHE_READ_WRITE_MODE value) { + TheStructure.Common.RenderCacheReadWriteMode = value; + } + inline RENDER_CACHE_READ_WRITE_MODE getRenderCacheReadWriteMode(void) const { + return static_cast(TheStructure.Common.RenderCacheReadWriteMode); + } + inline void setSamplerL2OutOfOrderModeDisable(const bool value) { + TheStructure.Common.SamplerL2OutOfOrderModeDisable = value; + } + inline bool getSamplerL2OutOfOrderModeDisable(void) const { + return TheStructure.Common.SamplerL2OutOfOrderModeDisable; + } + inline void setVerticalLineStrideOffset(const bool value) { + TheStructure.Common.VerticalLineStrideOffset = value; + } + inline bool getVerticalLineStrideOffset(void) const { + return TheStructure.Common.VerticalLineStrideOffset; + } + inline void setVerticalLineStride(const bool value) { + TheStructure.Common.VerticalLineStride = value; + } + inline bool getVerticalLineStride(void) const { + return TheStructure.Common.VerticalLineStride; + } + inline void setTileMode(const TILE_MODE value) { + TheStructure.Common.TileMode = value; + } + inline TILE_MODE getTileMode(void) const { + return static_cast(TheStructure.Common.TileMode); + } + inline void setSurfaceHorizontalAlignment(const SURFACE_HORIZONTAL_ALIGNMENT value) { + TheStructure.Common.SurfaceHorizontalAlignment = value; + } + inline SURFACE_HORIZONTAL_ALIGNMENT getSurfaceHorizontalAlignment(void) const { + return static_cast(TheStructure.Common.SurfaceHorizontalAlignment); + } + inline void setSurfaceVerticalAlignment(const SURFACE_VERTICAL_ALIGNMENT value) { + TheStructure.Common.SurfaceVerticalAlignment = value; + } + inline SURFACE_VERTICAL_ALIGNMENT getSurfaceVerticalAlignment(void) const { + return static_cast(TheStructure.Common.SurfaceVerticalAlignment); + } + inline void setSurfaceFormat(const SURFACE_FORMAT value) { + TheStructure.Common.SurfaceFormat = value; + } + inline SURFACE_FORMAT getSurfaceFormat(void) const { + return static_cast(TheStructure.Common.SurfaceFormat); + } + inline void setAstcEnable(const bool value) { + TheStructure.Common.AstcEnable = value; + } + inline bool getAstcEnable(void) const { + return TheStructure.Common.AstcEnable; + } + inline void setSurfaceArray(const bool value) { + TheStructure.Common.SurfaceArray = value; + } + inline bool getSurfaceArray(void) const { + return TheStructure.Common.SurfaceArray; + } + inline void setSurfaceType(const SURFACE_TYPE value) { + TheStructure.Common.SurfaceType = value; + } + inline SURFACE_TYPE getSurfaceType(void) const { + return static_cast(TheStructure.Common.SurfaceType); + } + typedef enum tagSURFACEQPITCH { + SURFACEQPITCH_BIT_SHIFT = 0x2, + SURFACEQPITCH_ALIGN_SIZE = 0x4, + } SURFACEQPITCH; + inline void setSurfaceQpitch(const uint32_t value) { + DEBUG_BREAK_IF(value > 0x7fff); + TheStructure.Common.SurfaceQpitch = value >> SURFACEQPITCH_BIT_SHIFT; + } + inline uint32_t getSurfaceQpitch(void) const { + return TheStructure.Common.SurfaceQpitch << SURFACEQPITCH_BIT_SHIFT; + } + inline void setSampleTapDiscardDisable(const SAMPLE_TAP_DISCARD_DISABLE value) { + TheStructure.Common.SampleTapDiscardDisable = value; + } + inline SAMPLE_TAP_DISCARD_DISABLE getSampleTapDiscardDisable(void) const { + return static_cast(TheStructure.Common.SampleTapDiscardDisable); + } + inline void setDoubleFetchDisable(const bool value) { + TheStructure.Common.DoubleFetchDisable = value; + } + inline bool getDoubleFetchDisable(void) const { + return TheStructure.Common.DoubleFetchDisable; + } + inline void setCornerTexelMode(const bool value) { + TheStructure.Common.CornerTexelMode = value; + } + inline bool getCornerTexelMode(void) const { + return TheStructure.Common.CornerTexelMode; + } + inline void setBaseMipLevel(const uint32_t value) { + UNRECOVERABLE_IF(value > 0xf80000); + TheStructure.Common.BaseMipLevel = value; + } + inline uint32_t getBaseMipLevel(void) const { + return TheStructure.Common.BaseMipLevel; + } + inline void setMemoryObjectControlStateIndexToMocsTables(const uint32_t value) { + DEBUG_BREAK_IF(value > 0x7e000000L); + TheStructure.Common.MemoryObjectControlStateIndexToMocsTables = value >> 1; + } + inline uint32_t getMemoryObjectControlStateIndexToMocsTables(void) const { + return (TheStructure.Common.MemoryObjectControlStateIndexToMocsTables << 1); + } + inline void setMemoryObjectControlState(const uint32_t value) { + TheStructure.Common.MemoryObjectControlStateEncryptedData = value; + TheStructure.Common.MemoryObjectControlStateIndexToMocsTables = (value >> 1); + } + inline uint32_t getMemoryObjectControlState(void) const { + uint32_t mocs = TheStructure.Common.MemoryObjectControlStateEncryptedData; + mocs |= (TheStructure.Common.MemoryObjectControlStateIndexToMocsTables << 1); + return (mocs); + } + inline void setEnableUnormPathInColorPipe(const bool value) { + TheStructure.Common.EnableUnormPathInColorPipe = value; + } + inline bool getEnableUnormPathInColorPipe(void) const { + return TheStructure.Common.EnableUnormPathInColorPipe; + } + inline void setWidth(const uint32_t value) { + UNRECOVERABLE_IF(value > (0x3fff + 1)); + TheStructure.Common.Width = value - 1; + } + inline uint32_t getWidth(void) const { + return TheStructure.Common.Width + 1; + } + inline void setHeight(const uint32_t value) { + UNRECOVERABLE_IF(value > (0x3fff0000 + 1)); + TheStructure.Common.Height = value - 1; + } + inline uint32_t getHeight(void) const { + return TheStructure.Common.Height + 1; + } + inline void setDepthStencilResource(const bool value) { + TheStructure.Common.DepthStencilResource = value; + } + inline bool getDepthStencilResource(void) const { + return TheStructure.Common.DepthStencilResource; + } + inline void setSurfacePitch(const uint32_t value) { + UNRECOVERABLE_IF(value > (0x3ffff + 1)); + TheStructure.Common.SurfacePitch = value - 1; + } + inline uint32_t getSurfacePitch(void) const { + return TheStructure.Common.SurfacePitch + 1; + } + inline void setNullProbingEnable(const NULL_PROBING_ENABLE value) { + TheStructure.Common.NullProbingEnable = value; + } + inline NULL_PROBING_ENABLE getNullProbingEnable(void) const { + return static_cast(TheStructure.Common.NullProbingEnable); + } + inline void setDepth(const uint32_t value) { + UNRECOVERABLE_IF(value > (0xffe00000 + 1)); + TheStructure.Common.Depth = value - 1; + } + inline uint32_t getDepth(void) const { + return TheStructure.Common.Depth + 1; + } + inline void setMultisamplePositionPaletteIndex(const uint32_t value) { + UNRECOVERABLE_IF(value > 0x7); + TheStructure.Common.MultisamplePositionPaletteIndex = value; + } + inline uint32_t getMultisamplePositionPaletteIndex(void) const { + return TheStructure.Common.MultisamplePositionPaletteIndex; + } + inline void setNumberOfMultisamples(const NUMBER_OF_MULTISAMPLES value) { + TheStructure.Common.NumberOfMultisamples = value; + } + inline NUMBER_OF_MULTISAMPLES getNumberOfMultisamples(void) const { + return static_cast(TheStructure.Common.NumberOfMultisamples); + } + inline void setMultisampledSurfaceStorageFormat(const MULTISAMPLED_SURFACE_STORAGE_FORMAT value) { + TheStructure.Common.MultisampledSurfaceStorageFormat = value; + } + inline MULTISAMPLED_SURFACE_STORAGE_FORMAT getMultisampledSurfaceStorageFormat(void) const { + return static_cast(TheStructure.Common.MultisampledSurfaceStorageFormat); + } + inline void setRenderTargetViewExtent(const uint32_t value) { + UNRECOVERABLE_IF(value > (0x3ff80 + 1)); + TheStructure.Common.RenderTargetViewExtent = value - 1; + } + inline uint32_t getRenderTargetViewExtent(void) const { + return TheStructure.Common.RenderTargetViewExtent + 1; + } + inline void setMinimumArrayElement(const uint32_t value) { + UNRECOVERABLE_IF(value > 0x1ffc0000); + TheStructure.Common.MinimumArrayElement = value; + } + inline uint32_t getMinimumArrayElement(void) const { + return TheStructure.Common.MinimumArrayElement; + } + inline void setRenderTargetAndSampleUnormRotation(const RENDER_TARGET_AND_SAMPLE_UNORM_ROTATION value) { + TheStructure.Common.RenderTargetAndSampleUnormRotation = value; + } + inline RENDER_TARGET_AND_SAMPLE_UNORM_ROTATION getRenderTargetAndSampleUnormRotation(void) const { + return static_cast(TheStructure.Common.RenderTargetAndSampleUnormRotation); + } + inline void setMipCountLod(const uint32_t value) { + UNRECOVERABLE_IF(value > 0xf); + TheStructure.Common.MipCountLod = value; + } + inline uint32_t getMipCountLod(void) const { + return TheStructure.Common.MipCountLod; + } + inline void setSurfaceMinLod(const uint32_t value) { + UNRECOVERABLE_IF(value > 0xf0); + TheStructure.Common.SurfaceMinLod = value; + } + inline uint32_t getSurfaceMinLod(void) const { + return TheStructure.Common.SurfaceMinLod; + } + inline void setMipTailStartLod(const uint32_t value) { + UNRECOVERABLE_IF(value > 0xf00); + TheStructure.Common.MipTailStartLod = value; + } + inline uint32_t getMipTailStartLod(void) const { + return TheStructure.Common.MipTailStartLod; + } + inline void setCoherencyType(const COHERENCY_TYPE value) { + TheStructure.Common.CoherencyType = value; + } + inline COHERENCY_TYPE getCoherencyType(void) const { + return static_cast(TheStructure.Common.CoherencyType); + } + inline void setL1CachePolicyL1CacheControl(const uint32_t value) { + TheStructure.Common.L1CachePolicyL1CacheControl = value; + } + inline uint32_t getL1CachePolicyL1CacheControl(void) const { + return TheStructure.Common.L1CachePolicyL1CacheControl; + } + inline void setEwaDisableForCube(const bool value) { + TheStructure.Common.EwaDisableForCube = value; + } + inline bool getEwaDisableForCube(void) const { + return TheStructure.Common.EwaDisableForCube; + } + typedef enum tagYOFFSET { + YOFFSET_BIT_SHIFT = 0x2, + YOFFSET_ALIGN_SIZE = 0x4, + } YOFFSET; + inline void setYOffset(const uint32_t value) { + UNRECOVERABLE_IF(value > 0xe00000); + TheStructure.Common.YOffset = value >> YOFFSET_BIT_SHIFT; + } + inline uint32_t getYOffset(void) const { + return TheStructure.Common.YOffset << YOFFSET_BIT_SHIFT; + } + typedef enum tagXOFFSET { + XOFFSET_BIT_SHIFT = 0x2, + XOFFSET_ALIGN_SIZE = 0x4, + } XOFFSET; + inline void setXOffset(const uint32_t value) { + UNRECOVERABLE_IF(value > 0xfe000000); + TheStructure.Common.XOffset = value >> XOFFSET_BIT_SHIFT; + } + inline uint32_t getXOffset(void) const { + return TheStructure.Common.XOffset << XOFFSET_BIT_SHIFT; + } + inline void setResourceMinLod(const uint32_t value) { + UNRECOVERABLE_IF(value > 0xfff); + TheStructure.Common.ResourceMinLod = value; + } + inline uint32_t getResourceMinLod(void) const { + return TheStructure.Common.ResourceMinLod; + } + inline void setDisableSupportForMultiGpuAtomics(const bool value) { + TheStructure.Common.DisableSupportForMultiGpuAtomics = value; + } + inline bool getDisableSupportForMultiGpuAtomics(void) const { + return TheStructure.Common.DisableSupportForMultiGpuAtomics; + } + inline void setDisableSupportForMultiGpuPartialWrites(const bool value) { + TheStructure.Common.DisableSupportForMultiGpuPartialWrites = value; + } + inline bool getDisableSupportForMultiGpuPartialWrites(void) const { + return TheStructure.Common.DisableSupportForMultiGpuPartialWrites; + } + inline void setShaderChannelSelectAlpha(const SHADER_CHANNEL_SELECT value) { + TheStructure.Common.ShaderChannelSelectAlpha = value; + } + inline SHADER_CHANNEL_SELECT getShaderChannelSelectAlpha(void) const { + return static_cast(TheStructure.Common.ShaderChannelSelectAlpha); + } + inline void setShaderChannelSelectBlue(const SHADER_CHANNEL_SELECT value) { + TheStructure.Common.ShaderChannelSelectBlue = value; + } + inline SHADER_CHANNEL_SELECT getShaderChannelSelectBlue(void) const { + return static_cast(TheStructure.Common.ShaderChannelSelectBlue); + } + inline void setShaderChannelSelectGreen(const SHADER_CHANNEL_SELECT value) { + TheStructure.Common.ShaderChannelSelectGreen = value; + } + inline SHADER_CHANNEL_SELECT getShaderChannelSelectGreen(void) const { + return static_cast(TheStructure.Common.ShaderChannelSelectGreen); + } + inline void setShaderChannelSelectRed(const SHADER_CHANNEL_SELECT value) { + TheStructure.Common.ShaderChannelSelectRed = value; + } + inline SHADER_CHANNEL_SELECT getShaderChannelSelectRed(void) const { + return static_cast(TheStructure.Common.ShaderChannelSelectRed); + } + inline void setMemoryCompressionEnable(const bool value) { + TheStructure.Common.MemoryCompressionEnable = value; + } + inline bool getMemoryCompressionEnable(void) const { + return TheStructure.Common.MemoryCompressionEnable; + } + inline void setMemoryCompressionMode(const MEMORY_COMPRESSION_MODE value) { + TheStructure.Common.MemoryCompressionMode = value; + } + inline MEMORY_COMPRESSION_MODE getMemoryCompressionMode(void) const { + return static_cast(TheStructure.Common.MemoryCompressionMode); + } + inline void setCompressionFormat(uint32_t compressionFormat) { + UNRECOVERABLE_IF(compressionFormat > 0x1F); + TheStructure.Common.CompressionFormat = compressionFormat; + } + inline uint32_t getCompressionFormat(void) const { + return TheStructure.Common.CompressionFormat; + } + inline void setSurfaceBaseAddress(const uint64_t value) { + TheStructure.Common.SurfaceBaseAddress = value; + } + inline uint64_t getSurfaceBaseAddress(void) const { + return TheStructure.Common.SurfaceBaseAddress; + } + inline void setQuiltWidth(const uint64_t value) { + UNRECOVERABLE_IF(value > 0x1fL); + TheStructure.Common.QuiltWidth = value; + } + inline uint64_t getQuiltWidth(void) const { + return TheStructure.Common.QuiltWidth; + } + inline void setQuiltHeight(const uint64_t value) { + UNRECOVERABLE_IF(value > 0x3e0L); + TheStructure.Common.QuiltHeight = value; + } + inline uint64_t getQuiltHeight(void) const { + return TheStructure.Common.QuiltHeight; + } + inline void setClearValueAddressEnable(const bool value) { + TheStructure.Common.ClearValueAddressEnable = value; + } + inline bool getClearValueAddressEnable(void) const { + return TheStructure.Common.ClearValueAddressEnable; + } + inline void setProceduralTexture(const bool value) { + TheStructure.Common.ProceduralTexture = value; + } + inline bool getProceduralTexture(void) const { + return TheStructure.Common.ProceduralTexture; + } + typedef enum tagCLEARADDRESSLOW { + CLEARADDRESSLOW_BIT_SHIFT = 0x6, + CLEARADDRESSLOW_ALIGN_SIZE = 0x40, + } CLEARADDRESSLOW; + inline void setClearColorAddress(const uint32_t value) { + UNRECOVERABLE_IF(value > 0xffffffc0); + TheStructure.Common.ClearColorAddress = value >> CLEARADDRESSLOW_BIT_SHIFT; + } + inline uint32_t getClearColorAddress(void) const { + return TheStructure.Common.ClearColorAddress << CLEARADDRESSLOW_BIT_SHIFT; + } + inline void setClearColorAddressHigh(const uint32_t value) { + UNRECOVERABLE_IF(value > 0xffff); + TheStructure.Common.ClearColorAddressHigh = value; + } + inline uint32_t getClearColorAddressHigh(void) const { + return TheStructure.Common.ClearColorAddressHigh; + } + inline void setDisallowLowQualityFlitering(const bool value) { + TheStructure.Common.DisallowLowQualityFlitering = value; + } + inline bool getDisallowLowQualityFlitering(void) const { + return TheStructure.Common.DisallowLowQualityFlitering; + } + inline void setAuxiliarySurfaceMode(const AUXILIARY_SURFACE_MODE value) { + TheStructure._SurfaceFormatIsnotPlanar.AuxiliarySurfaceMode = value; + } + inline AUXILIARY_SURFACE_MODE getAuxiliarySurfaceMode(void) const { + return static_cast(TheStructure._SurfaceFormatIsnotPlanar.AuxiliarySurfaceMode); + } + inline void setAuxiliarySurfacePitch(const uint32_t value) { + UNRECOVERABLE_IF(value > (0x1ff8 + 1)); + TheStructure._SurfaceFormatIsnotPlanar.AuxiliarySurfacePitch = value - 1; + } + inline uint32_t getAuxiliarySurfacePitch(void) const { + return TheStructure._SurfaceFormatIsnotPlanar.AuxiliarySurfacePitch + 1; + } + typedef enum tagAUXILIARYSURFACEQPITCH { + AUXILIARYSURFACEQPITCH_BIT_SHIFT = 0x2, + AUXILIARYSURFACEQPITCH_ALIGN_SIZE = 0x4, + } AUXILIARYSURFACEQPITCH; + inline void setAuxiliarySurfaceQpitch(const uint32_t value) { + UNRECOVERABLE_IF(value > 0x7fff0000); + TheStructure._SurfaceFormatIsnotPlanar.AuxiliarySurfaceQpitch = value >> AUXILIARYSURFACEQPITCH_BIT_SHIFT; + } + inline uint32_t getAuxiliarySurfaceQpitch(void) const { + return TheStructure._SurfaceFormatIsnotPlanar.AuxiliarySurfaceQpitch << AUXILIARYSURFACEQPITCH_BIT_SHIFT; + } + inline void setYOffsetForUOrUvPlane(const uint32_t value) { + UNRECOVERABLE_IF(value > 0x3fff); + TheStructure._SurfaceFormatIsPlanar.YOffsetForUOrUvPlane = value; + } + inline uint32_t getYOffsetForUOrUvPlane(void) const { + return TheStructure._SurfaceFormatIsPlanar.YOffsetForUOrUvPlane; + } + inline void setXOffsetForUOrUvPlane(const uint32_t value) { + UNRECOVERABLE_IF(value > 0x3fff0000); + TheStructure._SurfaceFormatIsPlanar.XOffsetForUOrUvPlane = value; + } + inline uint32_t getXOffsetForUOrUvPlane(void) const { + return TheStructure._SurfaceFormatIsPlanar.XOffsetForUOrUvPlane; + } + inline void setHalfPitchForChroma(const HALF_PITCH_FOR_CHROMA value) { + TheStructure._SurfaceFormatIsPlanar.HalfPitchForChroma = value; + } + inline HALF_PITCH_FOR_CHROMA getHalfPitchForChroma(void) const { + return static_cast(TheStructure._SurfaceFormatIsPlanar.HalfPitchForChroma); + } + inline void setSeparateUvPlaneEnable(const bool value) { + TheStructure._SurfaceFormatIsPlanar.SeparateUvPlaneEnable = value; + } + inline bool getSeparateUvPlaneEnable(void) const { + return TheStructure._SurfaceFormatIsPlanar.SeparateUvPlaneEnable; + } + inline void setYOffsetForVPlane(const uint64_t value) { + UNRECOVERABLE_IF(value > 0x3fff00000000L); + TheStructure._SurfaceFormatIsPlanar.YOffsetForVPlane = value; + } + inline uint64_t getYOffsetForVPlane(void) const { + return TheStructure._SurfaceFormatIsPlanar.YOffsetForVPlane; + } + inline void setXOffsetForVPlane(const uint64_t value) { + UNRECOVERABLE_IF(value > 0x3fff000000000000L); + TheStructure._SurfaceFormatIsPlanar.XOffsetForVPlane = value; + } + inline uint64_t getXOffsetForVPlane(void) const { + return TheStructure._SurfaceFormatIsPlanar.XOffsetForVPlane; + } + typedef enum tagAUXILIARYSURFACEBASEADDRESS { + AUXILIARYSURFACEBASEADDRESS_BIT_SHIFT = 0xc, + AUXILIARYSURFACEBASEADDRESS_ALIGN_SIZE = 0x1000, + } AUXILIARYSURFACEBASEADDRESS; + inline void setAuxiliarySurfaceBaseAddress(const uint64_t value) { + UNRECOVERABLE_IF(value > 0xfffffffffffff000L); + TheStructure._SurfaceFormatIsnotPlanarAndMemoryCompressionEnableIs0.AuxiliarySurfaceBaseAddress = value >> AUXILIARYSURFACEBASEADDRESS_BIT_SHIFT; + } + inline uint64_t getAuxiliarySurfaceBaseAddress(void) const { + return TheStructure._SurfaceFormatIsnotPlanarAndMemoryCompressionEnableIs0.AuxiliarySurfaceBaseAddress << AUXILIARYSURFACEBASEADDRESS_BIT_SHIFT; + } +} RENDER_SURFACE_STATE; +STATIC_ASSERT(64 == sizeof(RENDER_SURFACE_STATE)); + +typedef struct tagSAMPLER_STATE { + union tagTheStructure { + struct tagCommon { + uint32_t LodAlgorithm : BITFIELD_RANGE(0, 0); + uint32_t TextureLodBias : BITFIELD_RANGE(1, 13); + uint32_t MinModeFilter : BITFIELD_RANGE(14, 16); + uint32_t MagModeFilter : BITFIELD_RANGE(17, 19); + uint32_t MipModeFilter : BITFIELD_RANGE(20, 21); + uint32_t CoarseLodQualityMode : BITFIELD_RANGE(22, 26); + uint32_t LodPreclampMode : BITFIELD_RANGE(27, 28); + uint32_t TextureBorderColorMode : BITFIELD_RANGE(29, 29); + uint32_t CpsLodCompensationEnable : BITFIELD_RANGE(30, 30); + uint32_t SamplerDisable : BITFIELD_RANGE(31, 31); + uint32_t CubeSurfaceControlMode : BITFIELD_RANGE(0, 0); + uint32_t ShadowFunction : BITFIELD_RANGE(1, 3); + uint32_t ChromakeyMode : BITFIELD_RANGE(4, 4); + uint32_t ChromakeyIndex : BITFIELD_RANGE(5, 6); + uint32_t ChromakeyEnable : BITFIELD_RANGE(7, 7); + uint32_t MaxLod : BITFIELD_RANGE(8, 19); + uint32_t MinLod : BITFIELD_RANGE(20, 31); + uint32_t LodClampMagnificationMode : BITFIELD_RANGE(0, 0); + uint32_t SrgbDecode : BITFIELD_RANGE(1, 1); + uint32_t ReturnFilterWeightForNullTexels : BITFIELD_RANGE(2, 2); + uint32_t ReturnFilterWeightForBorderTexels : BITFIELD_RANGE(3, 3); + uint32_t Reserved_68 : BITFIELD_RANGE(4, 5); + uint32_t IndirectStatePointer : BITFIELD_RANGE(6, 23); + uint32_t Reserved_88 : BITFIELD_RANGE(24, 31); + uint32_t TczAddressControlMode : BITFIELD_RANGE(0, 2); + uint32_t TcyAddressControlMode : BITFIELD_RANGE(3, 5); + uint32_t TcxAddressControlMode : BITFIELD_RANGE(6, 8); + uint32_t ReductionTypeEnable : BITFIELD_RANGE(9, 9); + uint32_t NonNormalizedCoordinateEnable : BITFIELD_RANGE(10, 10); + uint32_t TrilinearFilterQuality : BITFIELD_RANGE(11, 12); + uint32_t RAddressMinFilterRoundingEnable : BITFIELD_RANGE(13, 13); + uint32_t RAddressMagFilterRoundingEnable : BITFIELD_RANGE(14, 14); + uint32_t VAddressMinFilterRoundingEnable : BITFIELD_RANGE(15, 15); + uint32_t VAddressMagFilterRoundingEnable : BITFIELD_RANGE(16, 16); + uint32_t UAddressMinFilterRoundingEnable : BITFIELD_RANGE(17, 17); + uint32_t UAddressMagFilterRoundingEnable : BITFIELD_RANGE(18, 18); + uint32_t MaximumAnisotropy : BITFIELD_RANGE(19, 21); + uint32_t ReductionType : BITFIELD_RANGE(22, 23); + uint32_t AllowLowQualityLodCalculation : BITFIELD_RANGE(24, 24); + uint32_t Reserved_121 : BITFIELD_RANGE(25, 25); + uint32_t LowQualityFilter : BITFIELD_RANGE(26, 26); + uint32_t Reserved_123 : BITFIELD_RANGE(27, 31); + } Common; + uint32_t RawData[4]; + } TheStructure; + typedef enum tagLOD_ALGORITHM { + LOD_ALGORITHM_LEGACY = 0x0, + LOD_ALGORITHM_EWA_APPROXIMATION = 0x1, + } LOD_ALGORITHM; + typedef enum tagMIN_MODE_FILTER { + MIN_MODE_FILTER_NEAREST = 0x0, + MIN_MODE_FILTER_LINEAR = 0x1, + MIN_MODE_FILTER_ANISOTROPIC = 0x2, + MIN_MODE_FILTER_MONO = 0x6, + } MIN_MODE_FILTER; + typedef enum tagMAG_MODE_FILTER { + MAG_MODE_FILTER_NEAREST = 0x0, + MAG_MODE_FILTER_LINEAR = 0x1, + MAG_MODE_FILTER_ANISOTROPIC = 0x2, + MAG_MODE_FILTER_MONO = 0x6, + } MAG_MODE_FILTER; + typedef enum tagMIP_MODE_FILTER { + MIP_MODE_FILTER_NONE = 0x0, + MIP_MODE_FILTER_NEAREST = 0x1, + MIP_MODE_FILTER_LINEAR = 0x3, + } MIP_MODE_FILTER; + typedef enum tagCOARSE_LOD_QUALITY_MODE { + COARSE_LOD_QUALITY_MODE_DISABLED = 0x0, + } COARSE_LOD_QUALITY_MODE; + typedef enum tagLOD_PRECLAMP_MODE { + LOD_PRECLAMP_MODE_NONE = 0x0, + LOD_PRECLAMP_MODE_OGL = 0x2, + } LOD_PRECLAMP_MODE; + typedef enum tagTEXTURE_BORDER_COLOR_MODE { + TEXTURE_BORDER_COLOR_MODE_OGL = 0x0, + TEXTURE_BORDER_COLOR_MODE_8BIT = 0x1, + } TEXTURE_BORDER_COLOR_MODE; + typedef enum tagCUBE_SURFACE_CONTROL_MODE { + CUBE_SURFACE_CONTROL_MODE_PROGRAMMED = 0x0, + CUBE_SURFACE_CONTROL_MODE_OVERRIDE = 0x1, + } CUBE_SURFACE_CONTROL_MODE; + typedef enum tagSHADOW_FUNCTION { + SHADOW_FUNCTION_PREFILTEROP_ALWAYS = 0x0, + SHADOW_FUNCTION_PREFILTEROP_NEVER = 0x1, + SHADOW_FUNCTION_PREFILTEROP_LESS = 0x2, + SHADOW_FUNCTION_PREFILTEROP_EQUAL = 0x3, + SHADOW_FUNCTION_PREFILTEROP_LEQUAL = 0x4, + SHADOW_FUNCTION_PREFILTEROP_GREATER = 0x5, + SHADOW_FUNCTION_PREFILTEROP_NOTEQUAL = 0x6, + SHADOW_FUNCTION_PREFILTEROP_GEQUAL = 0x7, + } SHADOW_FUNCTION; + typedef enum tagCHROMAKEY_MODE { + CHROMAKEY_MODE_KEYFILTER_KILL_ON_ANY_MATCH = 0x0, + CHROMAKEY_MODE_KEYFILTER_REPLACE_BLACK = 0x1, + } CHROMAKEY_MODE; + typedef enum tagLOD_CLAMP_MAGNIFICATION_MODE { + LOD_CLAMP_MAGNIFICATION_MODE_MIPNONE = 0x0, + LOD_CLAMP_MAGNIFICATION_MODE_MIPFILTER = 0x1, + } LOD_CLAMP_MAGNIFICATION_MODE; + typedef enum tagSRGB_DECODE { + SRGB_DECODE_DECODE_EXT = 0x0, + SRGB_DECODE_SKIP_DECODE_EXT = 0x1, + } SRGB_DECODE; + typedef enum tagRETURN_FILTER_WEIGHT_FOR_NULL_TEXELS { + RETURN_FILTER_WEIGHT_FOR_NULL_TEXELS_DISABLE = 0x0, + RETURN_FILTER_WEIGHT_FOR_NULL_TEXELS_ENABLE = 0x1, + } RETURN_FILTER_WEIGHT_FOR_NULL_TEXELS; + typedef enum tagRETURN_FILTER_WEIGHT_FOR_BORDER_TEXELS { + RETURN_FILTER_WEIGHT_FOR_BORDER_TEXELS_DISABLE = 0x0, + RETURN_FILTER_WEIGHT_FOR_BORDER_TEXELS_ENABLE = 0x1, + } RETURN_FILTER_WEIGHT_FOR_BORDER_TEXELS; + typedef enum tagTEXTURE_COORDINATE_MODE { + TEXTURE_COORDINATE_MODE_WRAP = 0x0, + TEXTURE_COORDINATE_MODE_MIRROR = 0x1, + TEXTURE_COORDINATE_MODE_CLAMP = 0x2, + TEXTURE_COORDINATE_MODE_CUBE = 0x3, + TEXTURE_COORDINATE_MODE_CLAMP_BORDER = 0x4, + TEXTURE_COORDINATE_MODE_MIRROR_ONCE = 0x5, + TEXTURE_COORDINATE_MODE_HALF_BORDER = 0x6, + TEXTURE_COORDINATE_MODE_MIRROR_101 = 0x7, + } TEXTURE_COORDINATE_MODE; + typedef enum tagTRILINEAR_FILTER_QUALITY { + TRILINEAR_FILTER_QUALITY_FULL = 0x0, + TRILINEAR_FILTER_QUALITY_TRIQUAL_HIGHMAG_CLAMP_MIPFILTER = 0x1, + TRILINEAR_FILTER_QUALITY_MED = 0x2, + TRILINEAR_FILTER_QUALITY_LOW = 0x3, + } TRILINEAR_FILTER_QUALITY; + typedef enum tagMAXIMUM_ANISOTROPY { + MAXIMUM_ANISOTROPY_RATIO_21 = 0x0, + MAXIMUM_ANISOTROPY_RATIO_41 = 0x1, + MAXIMUM_ANISOTROPY_RATIO_61 = 0x2, + MAXIMUM_ANISOTROPY_RATIO_81 = 0x3, + MAXIMUM_ANISOTROPY_RATIO_101 = 0x4, + MAXIMUM_ANISOTROPY_RATIO_121 = 0x5, + MAXIMUM_ANISOTROPY_RATIO_141 = 0x6, + MAXIMUM_ANISOTROPY_RATIO_161 = 0x7, + } MAXIMUM_ANISOTROPY; + typedef enum tagREDUCTION_TYPE { + REDUCTION_TYPE_STD_FILTER = 0x0, + REDUCTION_TYPE_COMPARISON = 0x1, + REDUCTION_TYPE_MINIMUM = 0x2, + REDUCTION_TYPE_MAXIMUM = 0x3, + } REDUCTION_TYPE; + typedef enum tagLOW_QUALITY_FILTER { + LOW_QUALITY_FILTER_DISABLE = 0x0, + LOW_QUALITY_FILTER_ENABLE = 0x1, + } LOW_QUALITY_FILTER; + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.LodAlgorithm = LOD_ALGORITHM_LEGACY; + TheStructure.Common.MinModeFilter = MIN_MODE_FILTER_NEAREST; + TheStructure.Common.MagModeFilter = MAG_MODE_FILTER_NEAREST; + TheStructure.Common.MipModeFilter = MIP_MODE_FILTER_NONE; + TheStructure.Common.CoarseLodQualityMode = COARSE_LOD_QUALITY_MODE_DISABLED; + TheStructure.Common.LodPreclampMode = LOD_PRECLAMP_MODE_NONE; + TheStructure.Common.TextureBorderColorMode = TEXTURE_BORDER_COLOR_MODE_OGL; + TheStructure.Common.CubeSurfaceControlMode = CUBE_SURFACE_CONTROL_MODE_PROGRAMMED; + TheStructure.Common.ShadowFunction = SHADOW_FUNCTION_PREFILTEROP_ALWAYS; + TheStructure.Common.ChromakeyMode = CHROMAKEY_MODE_KEYFILTER_KILL_ON_ANY_MATCH; + TheStructure.Common.LodClampMagnificationMode = LOD_CLAMP_MAGNIFICATION_MODE_MIPNONE; + TheStructure.Common.SrgbDecode = SRGB_DECODE_DECODE_EXT; + TheStructure.Common.ReturnFilterWeightForNullTexels = RETURN_FILTER_WEIGHT_FOR_NULL_TEXELS_DISABLE; + TheStructure.Common.ReturnFilterWeightForBorderTexels = RETURN_FILTER_WEIGHT_FOR_BORDER_TEXELS_DISABLE; + TheStructure.Common.TczAddressControlMode = TEXTURE_COORDINATE_MODE_WRAP; + TheStructure.Common.TcyAddressControlMode = TEXTURE_COORDINATE_MODE_WRAP; + TheStructure.Common.TcxAddressControlMode = TEXTURE_COORDINATE_MODE_WRAP; + TheStructure.Common.TrilinearFilterQuality = TRILINEAR_FILTER_QUALITY_FULL; + TheStructure.Common.MaximumAnisotropy = MAXIMUM_ANISOTROPY_RATIO_21; + TheStructure.Common.ReductionType = REDUCTION_TYPE_STD_FILTER; + TheStructure.Common.LowQualityFilter = LOW_QUALITY_FILTER_DISABLE; + } + static tagSAMPLER_STATE sInit(void) { + SAMPLER_STATE state; + state.init(); + return state; + } + inline uint32_t &getRawData(const uint32_t index) { + DEBUG_BREAK_IF(index >= 4); + return TheStructure.RawData[index]; + } + inline void setLodAlgorithm(const LOD_ALGORITHM value) { + TheStructure.Common.LodAlgorithm = value; + } + inline LOD_ALGORITHM getLodAlgorithm(void) const { + return static_cast(TheStructure.Common.LodAlgorithm); + } + inline void setTextureLodBias(const uint32_t value) { + DEBUG_BREAK_IF(value > 0x3ffe); + TheStructure.Common.TextureLodBias = value; + } + inline uint32_t getTextureLodBias(void) const { + return TheStructure.Common.TextureLodBias; + } + inline void setMinModeFilter(const MIN_MODE_FILTER value) { + TheStructure.Common.MinModeFilter = value; + } + inline MIN_MODE_FILTER getMinModeFilter(void) const { + return static_cast(TheStructure.Common.MinModeFilter); + } + inline void setMagModeFilter(const MAG_MODE_FILTER value) { + TheStructure.Common.MagModeFilter = value; + } + inline MAG_MODE_FILTER getMagModeFilter(void) const { + return static_cast(TheStructure.Common.MagModeFilter); + } + inline void setMipModeFilter(const MIP_MODE_FILTER value) { + TheStructure.Common.MipModeFilter = value; + } + inline MIP_MODE_FILTER getMipModeFilter(void) const { + return static_cast(TheStructure.Common.MipModeFilter); + } + inline void setCoarseLodQualityMode(const COARSE_LOD_QUALITY_MODE value) { + TheStructure.Common.CoarseLodQualityMode = value; + } + inline COARSE_LOD_QUALITY_MODE getCoarseLodQualityMode(void) const { + return static_cast(TheStructure.Common.CoarseLodQualityMode); + } + inline void setLodPreclampMode(const LOD_PRECLAMP_MODE value) { + TheStructure.Common.LodPreclampMode = value; + } + inline LOD_PRECLAMP_MODE getLodPreclampMode(void) const { + return static_cast(TheStructure.Common.LodPreclampMode); + } + inline void setTextureBorderColorMode(const TEXTURE_BORDER_COLOR_MODE value) { + TheStructure.Common.TextureBorderColorMode = value; + } + inline TEXTURE_BORDER_COLOR_MODE getTextureBorderColorMode(void) const { + return static_cast(TheStructure.Common.TextureBorderColorMode); + } + inline void setCpsLodCompensationEnable(const bool value) { + TheStructure.Common.CpsLodCompensationEnable = value; + } + inline bool getCpsLodCompensationEnable(void) const { + return TheStructure.Common.CpsLodCompensationEnable; + } + inline void setSamplerDisable(const bool value) { + TheStructure.Common.SamplerDisable = value; + } + inline bool getSamplerDisable(void) const { + return TheStructure.Common.SamplerDisable; + } + inline void setCubeSurfaceControlMode(const CUBE_SURFACE_CONTROL_MODE value) { + TheStructure.Common.CubeSurfaceControlMode = value; + } + inline CUBE_SURFACE_CONTROL_MODE getCubeSurfaceControlMode(void) const { + return static_cast(TheStructure.Common.CubeSurfaceControlMode); + } + inline void setShadowFunction(const SHADOW_FUNCTION value) { + TheStructure.Common.ShadowFunction = value; + } + inline SHADOW_FUNCTION getShadowFunction(void) const { + return static_cast(TheStructure.Common.ShadowFunction); + } + inline void setChromakeyMode(const CHROMAKEY_MODE value) { + TheStructure.Common.ChromakeyMode = value; + } + inline CHROMAKEY_MODE getChromakeyMode(void) const { + return static_cast(TheStructure.Common.ChromakeyMode); + } + inline void setChromakeyIndex(const uint32_t value) { + DEBUG_BREAK_IF(value > 0x60); + TheStructure.Common.ChromakeyIndex = value; + } + inline uint32_t getChromakeyIndex(void) const { + return TheStructure.Common.ChromakeyIndex; + } + inline void setChromakeyEnable(const bool value) { + TheStructure.Common.ChromakeyEnable = value; + } + inline bool getChromakeyEnable(void) const { + return TheStructure.Common.ChromakeyEnable; + } + inline void setMaxLod(const uint32_t value) { + DEBUG_BREAK_IF(value > 0xfff00); + TheStructure.Common.MaxLod = value; + } + inline uint32_t getMaxLod(void) const { + return TheStructure.Common.MaxLod; + } + inline void setMinLod(const uint32_t value) { + DEBUG_BREAK_IF(value > 0xfff00000L); + TheStructure.Common.MinLod = value; + } + inline uint32_t getMinLod(void) const { + return TheStructure.Common.MinLod; + } + inline void setLodClampMagnificationMode(const LOD_CLAMP_MAGNIFICATION_MODE value) { + TheStructure.Common.LodClampMagnificationMode = value; + } + inline LOD_CLAMP_MAGNIFICATION_MODE getLodClampMagnificationMode(void) const { + return static_cast(TheStructure.Common.LodClampMagnificationMode); + } + inline void setSrgbDecode(const SRGB_DECODE value) { + TheStructure.Common.SrgbDecode = value; + } + inline SRGB_DECODE getSrgbDecode(void) const { + return static_cast(TheStructure.Common.SrgbDecode); + } + inline void setReturnFilterWeightForNullTexels(const RETURN_FILTER_WEIGHT_FOR_NULL_TEXELS value) { + TheStructure.Common.ReturnFilterWeightForNullTexels = value; + } + inline RETURN_FILTER_WEIGHT_FOR_NULL_TEXELS getReturnFilterWeightForNullTexels(void) const { + return static_cast(TheStructure.Common.ReturnFilterWeightForNullTexels); + } + inline void setReturnFilterWeightForBorderTexels(const RETURN_FILTER_WEIGHT_FOR_BORDER_TEXELS value) { + TheStructure.Common.ReturnFilterWeightForBorderTexels = value; + } + inline RETURN_FILTER_WEIGHT_FOR_BORDER_TEXELS getReturnFilterWeightForBorderTexels(void) const { + return static_cast(TheStructure.Common.ReturnFilterWeightForBorderTexels); + } + typedef enum tagINDIRECTSTATEPOINTER { + INDIRECTSTATEPOINTER_BIT_SHIFT = 0x6, + INDIRECTSTATEPOINTER_ALIGN_SIZE = 0x40, + } INDIRECTSTATEPOINTER; + inline void setIndirectStatePointer(const uint32_t value) { + DEBUG_BREAK_IF(value > 0xffffc0); + TheStructure.Common.IndirectStatePointer = static_cast(value) >> INDIRECTSTATEPOINTER_BIT_SHIFT; + } + inline uint32_t getIndirectStatePointer(void) const { + return TheStructure.Common.IndirectStatePointer << INDIRECTSTATEPOINTER_BIT_SHIFT; + } + inline void setTczAddressControlMode(const TEXTURE_COORDINATE_MODE value) { + TheStructure.Common.TczAddressControlMode = value; + } + inline TEXTURE_COORDINATE_MODE getTczAddressControlMode(void) const { + return static_cast(TheStructure.Common.TczAddressControlMode); + } + inline void setTcyAddressControlMode(const TEXTURE_COORDINATE_MODE value) { + TheStructure.Common.TcyAddressControlMode = value; + } + inline TEXTURE_COORDINATE_MODE getTcyAddressControlMode(void) const { + return static_cast(TheStructure.Common.TcyAddressControlMode); + } + inline void setTcxAddressControlMode(const TEXTURE_COORDINATE_MODE value) { + TheStructure.Common.TcxAddressControlMode = value; + } + inline TEXTURE_COORDINATE_MODE getTcxAddressControlMode(void) const { + return static_cast(TheStructure.Common.TcxAddressControlMode); + } + inline void setReductionTypeEnable(const bool value) { + TheStructure.Common.ReductionTypeEnable = value; + } + inline bool getReductionTypeEnable(void) const { + return TheStructure.Common.ReductionTypeEnable; + } + inline void setNonNormalizedCoordinateEnable(const bool value) { + TheStructure.Common.NonNormalizedCoordinateEnable = value; + } + inline bool getNonNormalizedCoordinateEnable(void) const { + return TheStructure.Common.NonNormalizedCoordinateEnable; + } + inline void setTrilinearFilterQuality(const TRILINEAR_FILTER_QUALITY value) { + TheStructure.Common.TrilinearFilterQuality = value; + } + inline TRILINEAR_FILTER_QUALITY getTrilinearFilterQuality(void) const { + return static_cast(TheStructure.Common.TrilinearFilterQuality); + } + inline void setRAddressMinFilterRoundingEnable(const bool value) { + TheStructure.Common.RAddressMinFilterRoundingEnable = value; + } + inline bool getRAddressMinFilterRoundingEnable(void) const { + return TheStructure.Common.RAddressMinFilterRoundingEnable; + } + inline void setRAddressMagFilterRoundingEnable(const bool value) { + TheStructure.Common.RAddressMagFilterRoundingEnable = value; + } + inline bool getRAddressMagFilterRoundingEnable(void) const { + return TheStructure.Common.RAddressMagFilterRoundingEnable; + } + inline void setVAddressMinFilterRoundingEnable(const bool value) { + TheStructure.Common.VAddressMinFilterRoundingEnable = value; + } + inline bool getVAddressMinFilterRoundingEnable(void) const { + return TheStructure.Common.VAddressMinFilterRoundingEnable; + } + inline void setVAddressMagFilterRoundingEnable(const bool value) { + TheStructure.Common.VAddressMagFilterRoundingEnable = value; + } + inline bool getVAddressMagFilterRoundingEnable(void) const { + return TheStructure.Common.VAddressMagFilterRoundingEnable; + } + inline void setUAddressMinFilterRoundingEnable(const bool value) { + TheStructure.Common.UAddressMinFilterRoundingEnable = value; + } + inline bool getUAddressMinFilterRoundingEnable(void) const { + return TheStructure.Common.UAddressMinFilterRoundingEnable; + } + inline void setUAddressMagFilterRoundingEnable(const bool value) { + TheStructure.Common.UAddressMagFilterRoundingEnable = value; + } + inline bool getUAddressMagFilterRoundingEnable(void) const { + return TheStructure.Common.UAddressMagFilterRoundingEnable; + } + inline void setMaximumAnisotropy(const MAXIMUM_ANISOTROPY value) { + TheStructure.Common.MaximumAnisotropy = value; + } + inline MAXIMUM_ANISOTROPY getMaximumAnisotropy(void) const { + return static_cast(TheStructure.Common.MaximumAnisotropy); + } + inline void setReductionType(const REDUCTION_TYPE value) { + TheStructure.Common.ReductionType = value; + } + inline REDUCTION_TYPE getReductionType(void) const { + return static_cast(TheStructure.Common.ReductionType); + } + inline void setLowQualityFilter(const LOW_QUALITY_FILTER value) { + TheStructure.Common.LowQualityFilter = value; + } + inline LOW_QUALITY_FILTER getLowQualityFilter(void) const { + return static_cast(TheStructure.Common.LowQualityFilter); + } + inline void setAllowLowQualityLodCalculation(const bool value) { + TheStructure.Common.AllowLowQualityLodCalculation = value; + } + inline bool getAllowLowQualityLodCalculation(void) const { + return TheStructure.Common.AllowLowQualityLodCalculation; + } +} SAMPLER_STATE; +STATIC_ASSERT(16 == sizeof(SAMPLER_STATE)); + +typedef struct tagSTATE_BASE_ADDRESS { + union tagTheStructure { + struct tagCommon { + // DWORD 0 + uint32_t DwordLength : BITFIELD_RANGE(0, 7); + uint32_t Reserved_8 : BITFIELD_RANGE(8, 15); + uint32_t _3DCommandSubOpcode : BITFIELD_RANGE(16, 23); + uint32_t _3DCommandOpcode : BITFIELD_RANGE(24, 26); + uint32_t CommandSubtype : BITFIELD_RANGE(27, 28); + uint32_t CommandType : BITFIELD_RANGE(29, 31); + // DWORD 1-2 + uint64_t GeneralStateBaseAddressModifyEnable : BITFIELD_RANGE(0, 0); + uint64_t Reserved_33 : BITFIELD_RANGE(1, 3); + uint64_t GeneralStateMemoryObjectControlState_Reserved : BITFIELD_RANGE(4, 4); + uint64_t GeneralStateMemoryObjectControlState_IndexToMocsTables : BITFIELD_RANGE(5, 10); + uint64_t Reserved_43 : BITFIELD_RANGE(11, 11); + uint64_t GeneralStateBaseAddress : BITFIELD_RANGE(12, 63); + // DWORD 3 + uint32_t Reserved_96 : BITFIELD_RANGE(0, 12); + uint32_t EnableMemoryCompressionForAllStatelessAccesses : BITFIELD_RANGE(13, 13); + uint32_t DisableSupportForMultiGpuAtomicsForStatelessAccesses : BITFIELD_RANGE(14, 14); + uint32_t DisableSupportForMultiGpuPartialWritesForStatelessMessages : BITFIELD_RANGE(15, 15); + uint32_t StatelessDataPortAccessMemoryObjectControlState_Reserved : BITFIELD_RANGE(16, 16); + uint32_t StatelessDataPortAccessMemoryObjectControlState_IndexToMocsTables : BITFIELD_RANGE(17, 22); + uint32_t L1CachePolicyL1CacheControl : BITFIELD_RANGE(23, 25); + uint32_t Reserved_119 : BITFIELD_RANGE(26, 31); + // DWORD 4-5 + uint64_t SurfaceStateBaseAddressModifyEnable : BITFIELD_RANGE(0, 0); + uint64_t Reserved_129 : BITFIELD_RANGE(1, 3); + uint64_t SurfaceStateMemoryObjectControlState_Reserved : BITFIELD_RANGE(4, 4); + uint64_t SurfaceStateMemoryObjectControlState_IndexToMocsTables : BITFIELD_RANGE(5, 10); + uint64_t Reserved_139 : BITFIELD_RANGE(11, 11); + uint64_t SurfaceStateBaseAddress : BITFIELD_RANGE(12, 63); + // DWORD 6-7 + uint64_t DynamicStateBaseAddressModifyEnable : BITFIELD_RANGE(0, 0); + uint64_t Reserved_193 : BITFIELD_RANGE(1, 3); + uint64_t DynamicStateMemoryObjectControlState_Reserved : BITFIELD_RANGE(4, 4); + uint64_t DynamicStateMemoryObjectControlState_IndexToMocsTables : BITFIELD_RANGE(5, 10); + uint64_t Reserved_203 : BITFIELD_RANGE(11, 11); + uint64_t DynamicStateBaseAddress : BITFIELD_RANGE(12, 63); + // DWORD 8-9 + uint64_t IndirectObjectBaseAddressModifyEnable : BITFIELD_RANGE(0, 0); + uint64_t Reserved_257 : BITFIELD_RANGE(1, 3); + uint64_t IndirectObjectMemoryObjectControlState_Reserved : BITFIELD_RANGE(4, 4); + uint64_t IndirectObjectMemoryObjectControlState_IndexToMocsTables : BITFIELD_RANGE(5, 10); + uint64_t Reserved_267 : BITFIELD_RANGE(11, 11); + uint64_t IndirectObjectBaseAddress : BITFIELD_RANGE(12, 63); + // DWORD 10-11 + uint64_t InstructionBaseAddressModifyEnable : BITFIELD_RANGE(0, 0); + uint64_t Reserved_321 : BITFIELD_RANGE(1, 3); + uint64_t InstructionMemoryObjectControlState_Reserved : BITFIELD_RANGE(4, 4); + uint64_t InstructionMemoryObjectControlState_IndexToMocsTables : BITFIELD_RANGE(5, 10); + uint64_t Reserved_331 : BITFIELD_RANGE(11, 11); + uint64_t InstructionBaseAddress : BITFIELD_RANGE(12, 63); + // DWORD 12 + uint32_t GeneralStateBufferSizeModifyEnable : BITFIELD_RANGE(0, 0); + uint32_t Reserved_385 : BITFIELD_RANGE(1, 11); + uint32_t GeneralStateBufferSize : BITFIELD_RANGE(12, 31); + // DWORD 13 + uint32_t DynamicStateBufferSizeModifyEnable : BITFIELD_RANGE(0, 0); + uint32_t Reserved_417 : BITFIELD_RANGE(1, 11); + uint32_t DynamicStateBufferSize : BITFIELD_RANGE(12, 31); + // DWORD 14 + uint32_t IndirectObjectBufferSizeModifyEnable : BITFIELD_RANGE(0, 0); + uint32_t Reserved_449 : BITFIELD_RANGE(1, 11); + uint32_t IndirectObjectBufferSize : BITFIELD_RANGE(12, 31); + // DWORD 15 + uint32_t InstructionBufferSizeModifyEnable : BITFIELD_RANGE(0, 0); + uint32_t Reserved_481 : BITFIELD_RANGE(1, 11); + uint32_t InstructionBufferSize : BITFIELD_RANGE(12, 31); + // DWORD 16-17 + uint64_t BindlessSurfaceStateBaseAddressModifyEnable : BITFIELD_RANGE(0, 0); + uint64_t Reserved_513 : BITFIELD_RANGE(1, 3); + uint64_t BindlessSurfaceStateMemoryObjectControlState_Reserved : BITFIELD_RANGE(4, 4); + uint64_t BindlessSurfaceStateMemoryObjectControlState_IndexToMocsTables : BITFIELD_RANGE(5, 10); + uint64_t Reserved_523 : BITFIELD_RANGE(11, 11); + uint64_t BindlessSurfaceStateBaseAddress : BITFIELD_RANGE(12, 63); + // DWORD 18 + uint32_t BindlessSurfaceStateSize; + // DWORD 19-20 + uint64_t BindlessSamplerStateBaseAddressModifyEnable : BITFIELD_RANGE(0, 0); + uint64_t Reserved_609 : BITFIELD_RANGE(1, 3); + uint64_t BindlessSamplerStateMemoryObjectControlState_Reserved : BITFIELD_RANGE(4, 4); + uint64_t BindlessSamplerStateMemoryObjectControlState_IndexToMocsTables : BITFIELD_RANGE(5, 10); + uint64_t Reserved_619 : BITFIELD_RANGE(11, 11); + uint64_t BindlessSamplerStateBaseAddress : BITFIELD_RANGE(12, 63); + // DWORD 21 + uint32_t Reserved_672 : BITFIELD_RANGE(0, 11); + uint32_t BindlessSamplerStateBufferSize : BITFIELD_RANGE(12, 31); + } Common; + uint32_t RawData[22]; + } TheStructure; + typedef enum tagDWORD_LENGTH { + DWORD_LENGTH_DWORD_COUNT_N = 0x14, + } DWORD_LENGTH; + typedef enum tag_3D_COMMAND_SUB_OPCODE { + _3D_COMMAND_SUB_OPCODE_STATE_BASE_ADDRESS = 0x1, + } _3D_COMMAND_SUB_OPCODE; + typedef enum tag_3D_COMMAND_OPCODE { + _3D_COMMAND_OPCODE_GFXPIPE_NONPIPELINED = 0x1, + } _3D_COMMAND_OPCODE; + typedef enum tagCOMMAND_SUBTYPE { + COMMAND_SUBTYPE_GFXPIPE_COMMON = 0x0, + } COMMAND_SUBTYPE; + typedef enum tagCOMMAND_TYPE { + COMMAND_TYPE_GFXPIPE = 0x3, + } COMMAND_TYPE; + typedef enum tagENABLE_MEMORY_COMPRESSION_FOR_ALL_STATELESS_ACCESSES { + ENABLE_MEMORY_COMPRESSION_FOR_ALL_STATELESS_ACCESSES_DISABLED = 0x0, + ENABLE_MEMORY_COMPRESSION_FOR_ALL_STATELESS_ACCESSES_ENABLED = 0x1, + } ENABLE_MEMORY_COMPRESSION_FOR_ALL_STATELESS_ACCESSES; + typedef enum tagL1_CACHE_POLICY { + L1_CACHE_POLICY_WBP = 0x0, + L1_CACHE_POLICY_UC = 0x1, + L1_CACHE_POLICY_WB = 0x2, + L1_CACHE_POLICY_WT = 0x3, + L1_CACHE_POLICY_WS = 0x4, + } L1_CACHE_POLICY; + typedef enum tagPATCH_CONSTANTS { + GENERALSTATEBASEADDRESS_BYTEOFFSET = 0x4, + GENERALSTATEBASEADDRESS_INDEX = 0x1, + SURFACESTATEBASEADDRESS_BYTEOFFSET = 0x10, + SURFACESTATEBASEADDRESS_INDEX = 0x4, + DYNAMICSTATEBASEADDRESS_BYTEOFFSET = 0x18, + DYNAMICSTATEBASEADDRESS_INDEX = 0x6, + INDIRECTOBJECTBASEADDRESS_BYTEOFFSET = 0x20, + INDIRECTOBJECTBASEADDRESS_INDEX = 0x8, + INSTRUCTIONBASEADDRESS_BYTEOFFSET = 0x28, + INSTRUCTIONBASEADDRESS_INDEX = 0xa, + BINDLESSSURFACESTATEBASEADDRESS_BYTEOFFSET = 0x40, + BINDLESSSURFACESTATEBASEADDRESS_INDEX = 0x10, + BINDLESSSAMPLERSTATEBASEADDRESS_BYTEOFFSET = 0x4c, + BINDLESSSAMPLERSTATEBASEADDRESS_INDEX = 0x13, + } PATCH_CONSTANTS; + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.DwordLength = DWORD_LENGTH_DWORD_COUNT_N; + TheStructure.Common._3DCommandSubOpcode = _3D_COMMAND_SUB_OPCODE_STATE_BASE_ADDRESS; + TheStructure.Common._3DCommandOpcode = _3D_COMMAND_OPCODE_GFXPIPE_NONPIPELINED; + TheStructure.Common.CommandSubtype = COMMAND_SUBTYPE_GFXPIPE_COMMON; + TheStructure.Common.CommandType = COMMAND_TYPE_GFXPIPE; + TheStructure.Common.EnableMemoryCompressionForAllStatelessAccesses = ENABLE_MEMORY_COMPRESSION_FOR_ALL_STATELESS_ACCESSES_DISABLED; + TheStructure.Common.DisableSupportForMultiGpuAtomicsForStatelessAccesses = 1; + TheStructure.Common.DisableSupportForMultiGpuPartialWritesForStatelessMessages = 1; + TheStructure.Common.L1CachePolicyL1CacheControl = L1_CACHE_POLICY_WBP; + } + static tagSTATE_BASE_ADDRESS sInit(void) { + STATE_BASE_ADDRESS state; + state.init(); + return state; + } + inline uint32_t &getRawData(const uint32_t index) { + DEBUG_BREAK_IF(index >= 22); + return TheStructure.RawData[index]; + } + inline void setGeneralStateBaseAddressModifyEnable(const bool value) { + TheStructure.Common.GeneralStateBaseAddressModifyEnable = value; + } + inline bool getGeneralStateBaseAddressModifyEnable(void) const { + return (TheStructure.Common.GeneralStateBaseAddressModifyEnable); + } + inline void setGeneralStateMemoryObjectControlStateReserved(const uint64_t value) { + TheStructure.Common.GeneralStateMemoryObjectControlState_Reserved = value; + } + inline uint64_t getGeneralStateMemoryObjectControlStateReserved(void) const { + return (TheStructure.Common.GeneralStateMemoryObjectControlState_Reserved); + } + inline void setGeneralStateMemoryObjectControlStateIndexToMocsTables(const uint64_t value) { + TheStructure.Common.GeneralStateMemoryObjectControlState_IndexToMocsTables = value >> 1; + } + inline uint64_t getGeneralStateMemoryObjectControlStateIndexToMocsTables(void) const { + return (TheStructure.Common.GeneralStateMemoryObjectControlState_IndexToMocsTables << 1); + } + typedef enum tagGENERALSTATEBASEADDRESS { + GENERALSTATEBASEADDRESS_BIT_SHIFT = 0xc, + GENERALSTATEBASEADDRESS_ALIGN_SIZE = 0x1000, + } GENERALSTATEBASEADDRESS; + inline void setGeneralStateBaseAddress(const uint64_t value) { + TheStructure.Common.GeneralStateBaseAddress = value >> GENERALSTATEBASEADDRESS_BIT_SHIFT; + } + inline uint64_t getGeneralStateBaseAddress(void) const { + return (TheStructure.Common.GeneralStateBaseAddress << GENERALSTATEBASEADDRESS_BIT_SHIFT); + } + inline void setEnableMemoryCompressionForAllStatelessAccesses(const ENABLE_MEMORY_COMPRESSION_FOR_ALL_STATELESS_ACCESSES value) { + TheStructure.Common.EnableMemoryCompressionForAllStatelessAccesses = value; + } + inline ENABLE_MEMORY_COMPRESSION_FOR_ALL_STATELESS_ACCESSES getEnableMemoryCompressionForAllStatelessAccesses(void) const { + return static_cast(TheStructure.Common.EnableMemoryCompressionForAllStatelessAccesses); + } + inline void setDisableSupportForMultiGpuAtomicsForStatelessAccesses(const bool value) { + TheStructure.Common.DisableSupportForMultiGpuAtomicsForStatelessAccesses = value; + } + inline bool getDisableSupportForMultiGpuAtomicsForStatelessAccesses(void) const { + return (TheStructure.Common.DisableSupportForMultiGpuAtomicsForStatelessAccesses); + } + inline void setDisableSupportForMultiGpuPartialWritesForStatelessMessages(const bool value) { + TheStructure.Common.DisableSupportForMultiGpuPartialWritesForStatelessMessages = value; + } + inline bool getDisableSupportForMultiGpuPartialWritesForStatelessMessages(void) const { + return (TheStructure.Common.DisableSupportForMultiGpuPartialWritesForStatelessMessages); + } + inline void setStatelessDataPortAccessMemoryObjectControlStateReserved(const uint32_t value) { + TheStructure.Common.StatelessDataPortAccessMemoryObjectControlState_Reserved = value; + } + inline uint32_t getStatelessDataPortAccessMemoryObjectControlStateReserved(void) const { + return (TheStructure.Common.StatelessDataPortAccessMemoryObjectControlState_Reserved); + } + inline void setStatelessDataPortAccessMemoryObjectControlStateIndexToMocsTables(const uint32_t value) { + TheStructure.Common.StatelessDataPortAccessMemoryObjectControlState_IndexToMocsTables = value >> 1; + } + inline uint32_t getStatelessDataPortAccessMemoryObjectControlStateIndexToMocsTables(void) const { + return (TheStructure.Common.StatelessDataPortAccessMemoryObjectControlState_IndexToMocsTables << 1); + } + inline void setL1CachePolicyL1CacheControl(const L1_CACHE_POLICY value) { + TheStructure.Common.L1CachePolicyL1CacheControl = value; + } + inline L1_CACHE_POLICY getL1CachePolicyL1CacheControl(void) const { + return static_cast(TheStructure.Common.L1CachePolicyL1CacheControl); + } + inline void setStatelessDataPortAccessMemoryObjectControlState(const uint32_t value) { + TheStructure.Common.StatelessDataPortAccessMemoryObjectControlState_Reserved = value; + TheStructure.Common.StatelessDataPortAccessMemoryObjectControlState_IndexToMocsTables = (value >> 1); + } + inline uint32_t getStatelessDataPortAccessMemoryObjectControlState(void) const { + uint32_t mocs = TheStructure.Common.StatelessDataPortAccessMemoryObjectControlState_Reserved; + mocs |= (TheStructure.Common.StatelessDataPortAccessMemoryObjectControlState_IndexToMocsTables << 1); + return (mocs); + } + inline void setSurfaceStateBaseAddressModifyEnable(const bool value) { + TheStructure.Common.SurfaceStateBaseAddressModifyEnable = value; + } + inline bool getSurfaceStateBaseAddressModifyEnable(void) const { + return (TheStructure.Common.SurfaceStateBaseAddressModifyEnable); + } + inline void setSurfaceStateMemoryObjectControlStateReserved(const uint64_t value) { + TheStructure.Common.SurfaceStateMemoryObjectControlState_Reserved = value; + } + inline uint64_t getSurfaceStateMemoryObjectControlStateReserved(void) const { + return (TheStructure.Common.SurfaceStateMemoryObjectControlState_Reserved); + } + inline void setSurfaceStateMemoryObjectControlStateIndexToMocsTables(const uint64_t value) { + TheStructure.Common.SurfaceStateMemoryObjectControlState_IndexToMocsTables = value >> 1; + } + inline uint64_t getSurfaceStateMemoryObjectControlStateIndexToMocsTables(void) const { + return (TheStructure.Common.SurfaceStateMemoryObjectControlState_IndexToMocsTables << 1); + } + typedef enum tagSURFACESTATEBASEADDRESS { + SURFACESTATEBASEADDRESS_BIT_SHIFT = 0xc, + SURFACESTATEBASEADDRESS_ALIGN_SIZE = 0x1000, + } SURFACESTATEBASEADDRESS; + inline void setSurfaceStateBaseAddress(const uint64_t value) { + TheStructure.Common.SurfaceStateBaseAddress = value >> SURFACESTATEBASEADDRESS_BIT_SHIFT; + } + inline uint64_t getSurfaceStateBaseAddress(void) const { + return (TheStructure.Common.SurfaceStateBaseAddress << SURFACESTATEBASEADDRESS_BIT_SHIFT); + } + inline void setDynamicStateBaseAddressModifyEnable(const bool value) { + TheStructure.Common.DynamicStateBaseAddressModifyEnable = value; + } + inline bool getDynamicStateBaseAddressModifyEnable(void) const { + return (TheStructure.Common.DynamicStateBaseAddressModifyEnable); + } + inline void setDynamicStateMemoryObjectControlStateReserved(const uint64_t value) { + TheStructure.Common.DynamicStateMemoryObjectControlState_Reserved = value; + } + inline uint64_t getDynamicStateMemoryObjectControlStateReserved(void) const { + return (TheStructure.Common.DynamicStateMemoryObjectControlState_Reserved); + } + inline void setDynamicStateMemoryObjectControlStateIndexToMocsTables(const uint64_t value) { + TheStructure.Common.DynamicStateMemoryObjectControlState_IndexToMocsTables = value >> 1; + } + inline uint64_t getDynamicStateMemoryObjectControlStateIndexToMocsTables(void) const { + return (TheStructure.Common.DynamicStateMemoryObjectControlState_IndexToMocsTables << 1); + } + typedef enum tagDYNAMICSTATEBASEADDRESS { + DYNAMICSTATEBASEADDRESS_BIT_SHIFT = 0xc, + DYNAMICSTATEBASEADDRESS_ALIGN_SIZE = 0x1000, + } DYNAMICSTATEBASEADDRESS; + inline void setDynamicStateBaseAddress(const uint64_t value) { + TheStructure.Common.DynamicStateBaseAddress = value >> DYNAMICSTATEBASEADDRESS_BIT_SHIFT; + } + inline uint64_t getDynamicStateBaseAddress(void) const { + return (TheStructure.Common.DynamicStateBaseAddress << DYNAMICSTATEBASEADDRESS_BIT_SHIFT); + } + inline void setIndirectObjectBaseAddressModifyEnable(const bool value) { + TheStructure.Common.IndirectObjectBaseAddressModifyEnable = value; + } + inline bool getIndirectObjectBaseAddressModifyEnable(void) const { + return (TheStructure.Common.IndirectObjectBaseAddressModifyEnable); + } + inline void setIndirectObjectMemoryObjectControlStateReserved(const uint64_t value) { + TheStructure.Common.IndirectObjectMemoryObjectControlState_Reserved = value; + } + inline uint64_t getIndirectObjectMemoryObjectControlStateReserved(void) const { + return (TheStructure.Common.IndirectObjectMemoryObjectControlState_Reserved); + } + inline void setIndirectObjectMemoryObjectControlStateIndexToMocsTables(const uint64_t value) { + TheStructure.Common.IndirectObjectMemoryObjectControlState_IndexToMocsTables = value >> 1; + } + inline uint64_t getIndirectObjectMemoryObjectControlStateIndexToMocsTables(void) const { + return (TheStructure.Common.IndirectObjectMemoryObjectControlState_IndexToMocsTables << 1); + } + typedef enum tagINDIRECTOBJECTBASEADDRESS { + INDIRECTOBJECTBASEADDRESS_BIT_SHIFT = 0xc, + INDIRECTOBJECTBASEADDRESS_ALIGN_SIZE = 0x1000, + } INDIRECTOBJECTBASEADDRESS; + inline void setIndirectObjectBaseAddress(const uint64_t value) { + TheStructure.Common.IndirectObjectBaseAddress = value >> INDIRECTOBJECTBASEADDRESS_BIT_SHIFT; + } + inline uint64_t getIndirectObjectBaseAddress(void) const { + return (TheStructure.Common.IndirectObjectBaseAddress << INDIRECTOBJECTBASEADDRESS_BIT_SHIFT); + } + inline void setInstructionBaseAddressModifyEnable(const bool value) { + TheStructure.Common.InstructionBaseAddressModifyEnable = value; + } + inline bool getInstructionBaseAddressModifyEnable(void) const { + return (TheStructure.Common.InstructionBaseAddressModifyEnable); + } + inline void setInstructionMemoryObjectControlStateReserved(const uint64_t value) { + TheStructure.Common.InstructionMemoryObjectControlState_Reserved = value; + } + inline uint64_t getInstructionMemoryObjectControlStateReserved(void) const { + return (TheStructure.Common.InstructionMemoryObjectControlState_Reserved); + } + inline void setInstructionMemoryObjectControlStateIndexToMocsTables(const uint64_t value) { + TheStructure.Common.InstructionMemoryObjectControlState_IndexToMocsTables = value >> 1; + } + inline uint64_t getInstructionMemoryObjectControlStateIndexToMocsTables(void) const { + return (TheStructure.Common.InstructionMemoryObjectControlState_IndexToMocsTables << 1); + } + inline void setInstructionMemoryObjectControlState(const uint32_t value) { + uint64_t val = static_cast(value); + TheStructure.Common.InstructionMemoryObjectControlState_Reserved = val; + TheStructure.Common.InstructionMemoryObjectControlState_IndexToMocsTables = (val >> 1); + } + inline uint32_t getInstructionMemoryObjectControlState(void) const { + uint64_t mocs = TheStructure.Common.InstructionMemoryObjectControlState_Reserved; + mocs |= (TheStructure.Common.InstructionMemoryObjectControlState_IndexToMocsTables << 1); + return static_cast(mocs); + } + typedef enum tagINSTRUCTIONBASEADDRESS { + INSTRUCTIONBASEADDRESS_BIT_SHIFT = 0xc, + INSTRUCTIONBASEADDRESS_ALIGN_SIZE = 0x1000, + } INSTRUCTIONBASEADDRESS; + inline void setInstructionBaseAddress(const uint64_t value) { + TheStructure.Common.InstructionBaseAddress = value >> INSTRUCTIONBASEADDRESS_BIT_SHIFT; + } + inline uint64_t getInstructionBaseAddress(void) const { + return (TheStructure.Common.InstructionBaseAddress << INSTRUCTIONBASEADDRESS_BIT_SHIFT); + } + inline void setGeneralStateBufferSizeModifyEnable(const bool value) { + TheStructure.Common.GeneralStateBufferSizeModifyEnable = value; + } + inline bool getGeneralStateBufferSizeModifyEnable(void) const { + return (TheStructure.Common.GeneralStateBufferSizeModifyEnable); + } + inline void setGeneralStateBufferSize(const uint32_t value) { + TheStructure.Common.GeneralStateBufferSize = value; + } + inline uint32_t getGeneralStateBufferSize(void) const { + return (TheStructure.Common.GeneralStateBufferSize); + } + inline void setDynamicStateBufferSizeModifyEnable(const bool value) { + TheStructure.Common.DynamicStateBufferSizeModifyEnable = value; + } + inline bool getDynamicStateBufferSizeModifyEnable(void) const { + return (TheStructure.Common.DynamicStateBufferSizeModifyEnable); + } + inline void setDynamicStateBufferSize(const uint32_t value) { + TheStructure.Common.DynamicStateBufferSize = value; + } + inline uint32_t getDynamicStateBufferSize(void) const { + return (TheStructure.Common.DynamicStateBufferSize); + } + inline void setIndirectObjectBufferSizeModifyEnable(const bool value) { + TheStructure.Common.IndirectObjectBufferSizeModifyEnable = value; + } + inline bool getIndirectObjectBufferSizeModifyEnable(void) const { + return (TheStructure.Common.IndirectObjectBufferSizeModifyEnable); + } + inline void setIndirectObjectBufferSize(const uint32_t value) { + TheStructure.Common.IndirectObjectBufferSize = value; + } + inline uint32_t getIndirectObjectBufferSize(void) const { + return (TheStructure.Common.IndirectObjectBufferSize); + } + inline void setInstructionBufferSizeModifyEnable(const bool value) { + TheStructure.Common.InstructionBufferSizeModifyEnable = value; + } + inline bool getInstructionBufferSizeModifyEnable(void) const { + return (TheStructure.Common.InstructionBufferSizeModifyEnable); + } + inline void setInstructionBufferSize(const uint32_t value) { + TheStructure.Common.InstructionBufferSize = value; + } + inline uint32_t getInstructionBufferSize(void) const { + return (TheStructure.Common.InstructionBufferSize); + } + inline void setBindlessSurfaceStateBaseAddressModifyEnable(const bool value) { + TheStructure.Common.BindlessSurfaceStateBaseAddressModifyEnable = value; + } + inline bool getBindlessSurfaceStateBaseAddressModifyEnable(void) const { + return (TheStructure.Common.BindlessSurfaceStateBaseAddressModifyEnable); + } + inline void setBindlessSurfaceStateMemoryObjectControlStateReserved(const uint64_t value) { + TheStructure.Common.BindlessSurfaceStateMemoryObjectControlState_Reserved = value; + } + inline uint64_t getBindlessSurfaceStateMemoryObjectControlStateReserved(void) const { + return (TheStructure.Common.BindlessSurfaceStateMemoryObjectControlState_Reserved); + } + inline void setBindlessSurfaceStateMemoryObjectControlStateIndexToMocsTables(const uint64_t value) { + TheStructure.Common.BindlessSurfaceStateMemoryObjectControlState_IndexToMocsTables = value >> 1; + } + inline uint64_t getBindlessSurfaceStateMemoryObjectControlStateIndexToMocsTables(void) const { + return (TheStructure.Common.BindlessSurfaceStateMemoryObjectControlState_IndexToMocsTables << 1); + } + typedef enum tagBINDLESSSURFACESTATEBASEADDRESS { + BINDLESSSURFACESTATEBASEADDRESS_BIT_SHIFT = 0xc, + BINDLESSSURFACESTATEBASEADDRESS_ALIGN_SIZE = 0x1000, + } BINDLESSSURFACESTATEBASEADDRESS; + inline void setBindlessSurfaceStateBaseAddress(const uint64_t value) { + TheStructure.Common.BindlessSurfaceStateBaseAddress = value >> BINDLESSSURFACESTATEBASEADDRESS_BIT_SHIFT; + } + inline uint64_t getBindlessSurfaceStateBaseAddress(void) const { + return (TheStructure.Common.BindlessSurfaceStateBaseAddress << BINDLESSSURFACESTATEBASEADDRESS_BIT_SHIFT); + } + inline void setBindlessSurfaceStateSize(const uint32_t value) { + TheStructure.Common.BindlessSurfaceStateSize = value; + } + inline uint32_t getBindlessSurfaceStateSize(void) const { + return TheStructure.Common.BindlessSurfaceStateSize; + } + inline void setBindlessSamplerStateBaseAddressModifyEnable(const bool value) { + TheStructure.Common.BindlessSamplerStateBaseAddressModifyEnable = value; + } + inline bool getBindlessSamplerStateBaseAddressModifyEnable(void) const { + return (TheStructure.Common.BindlessSamplerStateBaseAddressModifyEnable); + } + inline void setBindlessSamplerStateMemoryObjectControlStateReserved(const uint64_t value) { + TheStructure.Common.BindlessSamplerStateMemoryObjectControlState_Reserved = value; + } + inline uint64_t getBindlessSamplerStateMemoryObjectControlStateReserved(void) const { + return (TheStructure.Common.BindlessSamplerStateMemoryObjectControlState_Reserved); + } + inline void setBindlessSamplerStateMemoryObjectControlStateIndexToMocsTables(const uint64_t value) { + TheStructure.Common.BindlessSamplerStateMemoryObjectControlState_IndexToMocsTables = value >> 1; + } + inline uint64_t getBindlessSamplerStateMemoryObjectControlStateIndexToMocsTables(void) const { + return (TheStructure.Common.BindlessSamplerStateMemoryObjectControlState_IndexToMocsTables << 1); + } + typedef enum tagBINDLESSSAMPLERSTATEBASEADDRESS { + BINDLESSSAMPLERSTATEBASEADDRESS_BIT_SHIFT = 0xc, + BINDLESSSAMPLERSTATEBASEADDRESS_ALIGN_SIZE = 0x1000, + } BINDLESSSAMPLERSTATEBASEADDRESS; + inline void setBindlessSamplerStateBaseAddress(const uint64_t value) { + TheStructure.Common.BindlessSamplerStateBaseAddress = value >> BINDLESSSAMPLERSTATEBASEADDRESS_BIT_SHIFT; + } + inline uint64_t getBindlessSamplerStateBaseAddress(void) const { + return (TheStructure.Common.BindlessSamplerStateBaseAddress << BINDLESSSAMPLERSTATEBASEADDRESS_BIT_SHIFT); + } + inline void setBindlessSamplerStateBufferSize(const uint32_t value) { + TheStructure.Common.BindlessSamplerStateBufferSize = value; + } + inline uint32_t getBindlessSamplerStateBufferSize(void) const { + return (TheStructure.Common.BindlessSamplerStateBufferSize); + } +} STATE_BASE_ADDRESS; +STATIC_ASSERT(88 == sizeof(STATE_BASE_ADDRESS)); + +typedef struct tagMI_REPORT_PERF_COUNT { + union tagTheStructure { + struct tagCommon { + uint32_t DwordLength : BITFIELD_RANGE(0, 5); + uint32_t Reserved_6 : BITFIELD_RANGE(6, 22); + uint32_t MiCommandOpcode : BITFIELD_RANGE(23, 28); + uint32_t CommandType : BITFIELD_RANGE(29, 31); + uint64_t UseGlobalGtt : BITFIELD_RANGE(0, 0); + uint64_t Reserved_33 : BITFIELD_RANGE(1, 3); + uint64_t CoreModeEnable : BITFIELD_RANGE(4, 4); + uint64_t Reserved_37 : BITFIELD_RANGE(5, 5); + uint64_t MemoryAddress : BITFIELD_RANGE(6, 63); + uint32_t ReportId; + } Common; + uint32_t RawData[4]; + } TheStructure; + typedef enum tagDWORD_LENGTH { + DWORD_LENGTH_EXCLUDES_DWORD_0_1 = 0x2, + } DWORD_LENGTH; + typedef enum tagMI_COMMAND_OPCODE { + MI_COMMAND_OPCODE_MI_REPORT_PERF_COUNT = 0x28, + } MI_COMMAND_OPCODE; + typedef enum tagCOMMAND_TYPE { + COMMAND_TYPE_MI_COMMAND = 0x0, + } COMMAND_TYPE; + typedef enum tagPATCH_CONSTANTS { + MEMORYADDRESS_BYTEOFFSET = 0x4, + MEMORYADDRESS_INDEX = 0x1, + } PATCH_CONSTANTS; + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.DwordLength = DWORD_LENGTH_EXCLUDES_DWORD_0_1; + TheStructure.Common.MiCommandOpcode = MI_COMMAND_OPCODE_MI_REPORT_PERF_COUNT; + TheStructure.Common.CommandType = COMMAND_TYPE_MI_COMMAND; + } + static tagMI_REPORT_PERF_COUNT sInit(void) { + MI_REPORT_PERF_COUNT state; + state.init(); + return state; + } + inline uint32_t &getRawData(const uint32_t index) { + DEBUG_BREAK_IF(index >= 4); + return TheStructure.RawData[index]; + } + inline void setUseGlobalGtt(const bool value) { + TheStructure.Common.UseGlobalGtt = value; + } + inline bool getUseGlobalGtt(void) const { + return (TheStructure.Common.UseGlobalGtt); + } + inline void setCoreModeEnable(const uint64_t value) { + TheStructure.Common.CoreModeEnable = value; + } + inline uint64_t getCoreModeEnable(void) const { + return (TheStructure.Common.CoreModeEnable); + } + typedef enum tagMEMORYADDRESS { + MEMORYADDRESS_BIT_SHIFT = 0x6, + MEMORYADDRESS_ALIGN_SIZE = 0x40, + } MEMORYADDRESS; + inline void setMemoryAddress(const uint64_t value) { + TheStructure.Common.MemoryAddress = value >> MEMORYADDRESS_BIT_SHIFT; + } + inline uint64_t getMemoryAddress(void) const { + return (TheStructure.Common.MemoryAddress << MEMORYADDRESS_BIT_SHIFT); + } + inline void setReportId(const uint32_t value) { + TheStructure.Common.ReportId = value; + } + inline uint32_t getReportId(void) const { + return (TheStructure.Common.ReportId); + } +} MI_REPORT_PERF_COUNT; +STATIC_ASSERT(16 == sizeof(MI_REPORT_PERF_COUNT)); + +struct MI_USER_INTERRUPT { + union tagTheStructure { + struct tagCommon { + uint32_t Reserved_0 : BITFIELD_RANGE(0, 22); + uint32_t MICommandOpcode : BITFIELD_RANGE(23, 28); + uint32_t CommandType : BITFIELD_RANGE(29, 31); + } Common; + uint32_t RawData[1]; + } TheStructure; + enum MI_COMMAND_OPCODE { + MI_COMMAND_OPCODE_MI_USER_INTERRUPT = 2, + }; + enum COMMAND_TYPE { + COMMAND_TYPE_MI_COMMAND = 0, + }; + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.MICommandOpcode = MI_COMMAND_OPCODE_MI_USER_INTERRUPT; + } + static MI_USER_INTERRUPT sInit(void) { + MI_USER_INTERRUPT state; + state.init(); + return state; + } + inline uint32_t &getRawData(const uint32_t index) { + return TheStructure.RawData[index]; + } +}; +STATIC_ASSERT(4 == sizeof(MI_USER_INTERRUPT)); + +typedef struct tagMI_SET_PREDICATE { + union tagTheStructure { + struct tagCommon { + uint32_t PredicateEnable : BITFIELD_RANGE(0, 3); + uint32_t PredicateEnableWparid : BITFIELD_RANGE(4, 5); + uint32_t Reserved_6 : BITFIELD_RANGE(6, 22); + uint32_t MiCommandOpcode : BITFIELD_RANGE(23, 28); + uint32_t CommandType : BITFIELD_RANGE(29, 31); + } Common; + uint32_t RawData[1]; + } TheStructure; + typedef enum tagPREDICATE_ENABLE { + PREDICATE_ENABLE_PREDICATE_DISABLE = 0x0, + PREDICATE_ENABLE_PREDICATE_ON_CLEAR = 0x1, + PREDICATE_ENABLE_PREDICATE_ON_SET = 0x2, + PREDICATE_ENABLE_NOOP_ALWAYS = 0xf, + } PREDICATE_ENABLE; + typedef enum tagPREDICATE_ENABLE_WPARID { + PREDICATE_ENABLE_WPARID_NOOP_NEVER = 0x0, + PREDICATE_ENABLE_WPARID_NOOP_ON_ZERO_VALUE = 0x1, + PREDICATE_ENABLE_WPARID_NOOP_ON_NON_ZERO_VALUE = 0x2, + } PREDICATE_ENABLE_WPARID; + typedef enum tagMI_COMMAND_OPCODE { + MI_COMMAND_OPCODE_MI_SET_PREDICATE = 0x1, + } MI_COMMAND_OPCODE; + typedef enum tagCOMMAND_TYPE { + COMMAND_TYPE_MI_COMMAND = 0x0, + } COMMAND_TYPE; + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.PredicateEnable = PREDICATE_ENABLE_PREDICATE_DISABLE; + TheStructure.Common.PredicateEnableWparid = PREDICATE_ENABLE_WPARID_NOOP_NEVER; + TheStructure.Common.MiCommandOpcode = MI_COMMAND_OPCODE_MI_SET_PREDICATE; + TheStructure.Common.CommandType = COMMAND_TYPE_MI_COMMAND; + } + static tagMI_SET_PREDICATE sInit(void) { + MI_SET_PREDICATE state; + state.init(); + return state; + } + inline uint32_t &getRawData(const uint32_t index) { + UNRECOVERABLE_IF(index >= 1); + return TheStructure.RawData[index]; + } + inline void setPredicateEnable(const PREDICATE_ENABLE value) { + TheStructure.Common.PredicateEnable = value; + } + inline PREDICATE_ENABLE getPredicateEnable(void) const { + return static_cast(TheStructure.Common.PredicateEnable); + } + inline void setPredicateEnableWparid(const PREDICATE_ENABLE_WPARID value) { + TheStructure.Common.PredicateEnableWparid = value; + } + inline PREDICATE_ENABLE_WPARID getPredicateEnableWparid(void) const { + return static_cast(TheStructure.Common.PredicateEnableWparid); + } +} MI_SET_PREDICATE; +STATIC_ASSERT(4 == sizeof(MI_SET_PREDICATE)); + +typedef struct tagMI_CONDITIONAL_BATCH_BUFFER_END { + union tagTheStructure { + struct tagCommon { + uint32_t DwordLength : BITFIELD_RANGE(0, 7); + uint32_t Reserved_8 : BITFIELD_RANGE(8, 11); + uint32_t CompareOperation : BITFIELD_RANGE(12, 14); + uint32_t PredicateEnable : BITFIELD_RANGE(15, 15); + uint32_t Reserved_16 : BITFIELD_RANGE(16, 17); + uint32_t EndCurrentBatchBufferLevel : BITFIELD_RANGE(18, 18); + uint32_t CompareMaskMode : BITFIELD_RANGE(19, 19); + uint32_t Reserved_20 : BITFIELD_RANGE(20, 20); + uint32_t CompareSemaphore : BITFIELD_RANGE(21, 21); + uint32_t UseGlobalGtt : BITFIELD_RANGE(22, 22); + uint32_t MiCommandOpcode : BITFIELD_RANGE(23, 28); + uint32_t CommandType : BITFIELD_RANGE(29, 31); + uint32_t CompareDataDword; + uint64_t Reserved_64 : BITFIELD_RANGE(0, 2); + uint64_t CompareAddress : BITFIELD_RANGE(3, 47); + uint64_t CompareAddressReserved_112 : BITFIELD_RANGE(48, 63); + } Common; + uint32_t RawData[4]; + } TheStructure; + typedef enum tagDWORD_LENGTH { + DWORD_LENGTH_EXCLUDES_DWORD_0_1 = 0x2, + } DWORD_LENGTH; + typedef enum tagCOMPARE_OPERATION { + COMPARE_OPERATION_MAD_GREATER_THAN_IDD = 0x0, + COMPARE_OPERATION_MAD_GREATER_THAN_OR_EQUAL_IDD = 0x1, + COMPARE_OPERATION_MAD_LESS_THAN_IDD = 0x2, + COMPARE_OPERATION_MAD_LESS_THAN_OR_EQUAL_IDD = 0x3, + COMPARE_OPERATION_MAD_EQUAL_IDD = 0x4, + COMPARE_OPERATION_MAD_NOT_EQUAL_IDD = 0x5, + } COMPARE_OPERATION; + typedef enum tagCOMPARE_MASK_MODE { + COMPARE_MASK_MODE_COMPARE_MASK_MODE_DISABLED = 0x0, + COMPARE_MASK_MODE_COMPARE_MASK_MODE_ENABLED = 0x1, + } COMPARE_MASK_MODE; + typedef enum tagMI_COMMAND_OPCODE { + MI_COMMAND_OPCODE_MI_CONDITIONAL_BATCH_BUFFER_END = 0x36, + } MI_COMMAND_OPCODE; + typedef enum tagCOMMAND_TYPE { + COMMAND_TYPE_MI_COMMAND = 0x0, + } COMMAND_TYPE; + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.DwordLength = DWORD_LENGTH_EXCLUDES_DWORD_0_1; + TheStructure.Common.CompareOperation = COMPARE_OPERATION_MAD_GREATER_THAN_IDD; + TheStructure.Common.CompareMaskMode = COMPARE_MASK_MODE_COMPARE_MASK_MODE_DISABLED; + TheStructure.Common.MiCommandOpcode = MI_COMMAND_OPCODE_MI_CONDITIONAL_BATCH_BUFFER_END; + TheStructure.Common.CommandType = COMMAND_TYPE_MI_COMMAND; + } + static tagMI_CONDITIONAL_BATCH_BUFFER_END sInit(void) { + MI_CONDITIONAL_BATCH_BUFFER_END state; + state.init(); + return state; + } + inline uint32_t &getRawData(const uint32_t index) { + UNRECOVERABLE_IF(index >= 4); + return TheStructure.RawData[index]; + } + inline void setCompareOperation(const COMPARE_OPERATION value) { + TheStructure.Common.CompareOperation = value; + } + inline COMPARE_OPERATION getCompareOperation(void) const { + return static_cast(TheStructure.Common.CompareOperation); + } + inline void setPredicateEnable(const bool value) { + TheStructure.Common.PredicateEnable = value; + } + inline bool getPredicateEnable(void) const { + return TheStructure.Common.PredicateEnable; + } + inline void setEndCurrentBatchBufferLevel(const bool value) { + TheStructure.Common.EndCurrentBatchBufferLevel = value; + } + inline bool getEndCurrentBatchBufferLevel(void) const { + return TheStructure.Common.EndCurrentBatchBufferLevel; + } + inline void setCompareMaskMode(const COMPARE_MASK_MODE value) { + TheStructure.Common.CompareMaskMode = value; + } + inline COMPARE_MASK_MODE getCompareMaskMode(void) const { + return static_cast(TheStructure.Common.CompareMaskMode); + } + inline void setCompareSemaphore(const bool value) { + TheStructure.Common.CompareSemaphore = value; + } + inline bool getCompareSemaphore(void) const { + return TheStructure.Common.CompareSemaphore; + } + inline void setUseGlobalGtt(const bool value) { + TheStructure.Common.UseGlobalGtt = value; + } + inline bool getUseGlobalGtt(void) const { + return TheStructure.Common.UseGlobalGtt; + } + inline void setCompareDataDword(const uint32_t value) { + TheStructure.Common.CompareDataDword = value; + } + inline uint32_t getCompareDataDword(void) const { + return TheStructure.Common.CompareDataDword; + } + typedef enum tagCOMPAREADDRESS { + COMPAREADDRESS_BIT_SHIFT = 0x3, + COMPAREADDRESS_ALIGN_SIZE = 0x8, + } COMPAREADDRESS; + inline void setCompareAddress(const uint64_t value) { + TheStructure.Common.CompareAddress = value >> COMPAREADDRESS_BIT_SHIFT; + } + inline uint64_t getCompareAddress(void) const { + return TheStructure.Common.CompareAddress << COMPAREADDRESS_BIT_SHIFT; + } +} MI_CONDITIONAL_BATCH_BUFFER_END; +STATIC_ASSERT(16 == sizeof(MI_CONDITIONAL_BATCH_BUFFER_END)); + +struct XY_BLOCK_COPY_BLT { + union tagTheStructure { + struct tagCommon { + /// DWORD 0 + uint32_t DwordLength : BITFIELD_RANGE(0, 7); + uint32_t Reserved_8 : BITFIELD_RANGE(8, 8); + uint32_t NumberofMultisamples : BITFIELD_RANGE(9, 11); + uint32_t SpecialModeofOperation : BITFIELD_RANGE(12, 13); + uint32_t Reserved_14 : BITFIELD_RANGE(14, 18); + uint32_t ColorDepth : BITFIELD_RANGE(19, 21); + uint32_t InstructionTarget_Opcode : BITFIELD_RANGE(22, 28); + uint32_t Client : BITFIELD_RANGE(29, 31); + + /// DWORD 1 + uint32_t DestinationPitch : BITFIELD_RANGE(0, 17); + uint32_t DestinationAuxiliarysurfacemode : BITFIELD_RANGE(18, 20); + uint32_t DestinationMOCSvalue : BITFIELD_RANGE(21, 27); + uint32_t DestinationCompressionType : BITFIELD_RANGE(28, 28); + uint32_t DestinationCompressionEnable : BITFIELD_RANGE(29, 29); + uint32_t DestinationTiling : BITFIELD_RANGE(30, 31); + + /// DWORD 2 + uint32_t DestinationX1Coordinate_Left : BITFIELD_RANGE(0, 15); + uint32_t DestinationY1Coordinate_Top : BITFIELD_RANGE(16, 31); + + /// DWORD 3 + uint32_t DestinationX2Coordinate_Right : BITFIELD_RANGE(0, 15); + uint32_t DestinationY2Coordinate_Bottom : BITFIELD_RANGE(16, 31); + + /// DWORD 4..5 + uint64_t DestinationBaseAddress; + + /// DWORD 6 + uint32_t DestinationXoffset : BITFIELD_RANGE(0, 13); + uint32_t Reserved_206 : BITFIELD_RANGE(14, 15); + uint32_t DestinationYoffset : BITFIELD_RANGE(16, 29); + uint32_t Reserved_222 : BITFIELD_RANGE(30, 30); + uint32_t DestinationTargetMemory : BITFIELD_RANGE(31, 31); + + /// DWORD 7 + uint32_t SourceX1Coordinate_Left : BITFIELD_RANGE(0, 15); + uint32_t SourceY1Coordinate_Top : BITFIELD_RANGE(16, 31); + + /// DWORD 8 + uint32_t SourcePitch : BITFIELD_RANGE(0, 17); + uint32_t SourceAuxiliarysurfacemode : BITFIELD_RANGE(18, 20); + uint32_t SourceMOCS : BITFIELD_RANGE(21, 27); + uint32_t SourceCompressionType : BITFIELD_RANGE(28, 28); + uint32_t SourceCompressionEnable : BITFIELD_RANGE(29, 29); + uint32_t SourceTiling : BITFIELD_RANGE(30, 31); + + /// DWORD 9..10 + uint64_t SourceBaseAddress; + + /// DWORD 11 + uint32_t SourceXoffset : BITFIELD_RANGE(0, 13); + uint32_t Reserved_366 : BITFIELD_RANGE(14, 15); + uint32_t SourceYoffset : BITFIELD_RANGE(16, 29); + uint32_t Reserved_382 : BITFIELD_RANGE(30, 30); + uint32_t SourceTargetMemory : BITFIELD_RANGE(31, 31); + + /// DWORD 12 + uint32_t SourceCompressionFormat : BITFIELD_RANGE(0, 4); + uint32_t SourceClearValueEnable : BITFIELD_RANGE(5, 5); + uint32_t SourceClearAddressLow : BITFIELD_RANGE(6, 31); + + /// DWORD 13 + uint32_t SourceClearAddressHigh : BITFIELD_RANGE(0, 15); + uint32_t Reserved_432 : BITFIELD_RANGE(16, 31); + + /// DWORD 14 + uint32_t DestinationCompressionFormat : BITFIELD_RANGE(0, 4); + uint32_t DestinationClearValueEnable : BITFIELD_RANGE(5, 5); + uint32_t DestinationClearAddressLow : BITFIELD_RANGE(6, 31); + + /// DWORD 15 + uint32_t DestinationClearAddressHigh : BITFIELD_RANGE(0, 15); + uint32_t Reserved_496 : BITFIELD_RANGE(16, 31); + + /// DWORD 16 + uint32_t DestinationSurfaceHeight : BITFIELD_RANGE(0, 13); + uint32_t DestinationSurfaceWidth : BITFIELD_RANGE(14, 27); + uint32_t Reserved_540 : BITFIELD_RANGE(28, 28); + uint32_t DestinationSurfaceType : BITFIELD_RANGE(29, 31); + + /// DWORD 17 + uint32_t DestinationLOD : BITFIELD_RANGE(0, 3); + uint32_t DestinationSurfaceQpitch : BITFIELD_RANGE(4, 20); + uint32_t DestinationSurfaceDepth : BITFIELD_RANGE(21, 31); + + /// DWORD 18 + uint32_t DestinationHorizontalAlign : BITFIELD_RANGE(0, 1); + uint32_t Reserved_578 : BITFIELD_RANGE(2, 2); + uint32_t DestinationVerticalAlign : BITFIELD_RANGE(3, 4); + uint32_t DestinationSSID : BITFIELD_RANGE(5, 7); + uint32_t DestinationMipTailStartLOD : BITFIELD_RANGE(8, 11); + uint32_t Reserved_588 : BITFIELD_RANGE(12, 17); + uint32_t DestinationDepthStencilResource : BITFIELD_RANGE(18, 18); + uint32_t Reserved_595 : BITFIELD_RANGE(19, 20); + uint32_t DestinationArrayIndex : BITFIELD_RANGE(21, 31); + + /// DWORD 19 + uint32_t SourceSurfaceHeight : BITFIELD_RANGE(0, 13); + uint32_t SourceSurfaceWidth : BITFIELD_RANGE(14, 27); + uint32_t Reserved_636 : BITFIELD_RANGE(28, 28); + uint32_t SourceSurfaceType : BITFIELD_RANGE(29, 31); + + /// DWORD 20 + uint32_t SourceLOD : BITFIELD_RANGE(0, 3); + uint32_t SourceSurfaceQpitch : BITFIELD_RANGE(4, 20); + uint32_t SourceSurfaceDepth : BITFIELD_RANGE(21, 31); + + /// DWORD 21 + uint32_t SourceHorizontalAlign : BITFIELD_RANGE(0, 1); + uint32_t Reserved_674 : BITFIELD_RANGE(2, 2); + uint32_t SourceVerticalAlign : BITFIELD_RANGE(3, 4); + uint32_t SourceSSID : BITFIELD_RANGE(5, 7); + uint32_t SourceMipTailStartLOD : BITFIELD_RANGE(8, 11); + uint32_t Reserved_684 : BITFIELD_RANGE(12, 17); + uint32_t SourceDepthStencilResource : BITFIELD_RANGE(18, 18); + uint32_t Reserved_691 : BITFIELD_RANGE(19, 20); + uint32_t SourceArrayIndex : BITFIELD_RANGE(21, 31); + + } Common; + uint32_t RawData[22]; + } TheStructure; + + enum DWORD_LENGTH { + DWORD_LENGTH_EXCLUDES_DWORD_0_1 = 20, + }; + + enum NUMBER_OF_MULTISAMPLES { + NUMBER_OF_MULTISAMPLES_MULTISAMPLECOUNT_1 = 0, + NUMBER_OF_MULTISAMPLES_MULTISAMPLECOUNT_2 = 1, + NUMBER_OF_MULTISAMPLES_MULTISAMPLECOUNT_4 = 2, + NUMBER_OF_MULTISAMPLES_MULTISAMPLECOUNT_8 = 3, + NUMBER_OF_MULTISAMPLES_MULTISAMPLECOUNT_16 = 4, + }; + + enum SPECIAL_MODE_OF_OPERATION { + SPECIAL_MODE_OF_OPERATION_NONE = 0, + SPECIAL_MODE_OF_OPERATION_FULL_RESOLVE = 1, + SPECIAL_MODE_OF_OPERATION_PARTIAL_RESOLVE = 2, + }; + + enum COLOR_DEPTH { + COLOR_DEPTH_8_BIT_COLOR = 0, + COLOR_DEPTH_16_BIT_COLOR = 1, + COLOR_DEPTH_32_BIT_COLOR = 2, + COLOR_DEPTH_64_BIT_COLOR = 3, + COLOR_DEPTH_96_BIT_COLOR_ONLY_LINEAR_CASE_IS_SUPPORTED = 4, + COLOR_DEPTH_128_BIT_COLOR = 5, + }; + + enum CLIENT { + CLIENT_2D_PROCESSOR = 2, + }; + + enum AUXILIARY_SURFACE_MODE { + AUXILIARY_SURFACE_MODE_AUX_NONE = 0, + AUXILIARY_SURFACE_MODE_AUX_CCS_E = 5, + }; + + enum COMPRESSION_TYPE { + COMPRESSION_TYPE_3D_COMPRESSION = 0, + COMPRESSION_TYPE_MEDIA_COMPRESSION = 1, + }; + + enum COMPRESSION_ENABLE { + COMPRESSION_ENABLE_COMPRESSION_DISABLE = 0, + COMPRESSION_ENABLE_COMPRESSION_ENABLE = 1, + }; + + enum TILING { + TILING_LINEAR = 0, + TILING_TILE4 = 2, + TILING_TILE64 = 3, + }; + + enum TARGET_MEMORY { + TARGET_MEMORY_LOCAL_MEM = 0, + TARGET_MEMORY_SYSTEM_MEM = 1, + }; + + enum CLEAR_VALUE_ENABLE { + CLEAR_VALUE_ENABLE_DISABLE = 0, + CLEAR_VALUE_ENABLE_ENABLE = 1, + }; + + enum SURFACE_TYPE { + SURFACE_TYPE_SURFTYPE_1D = 0, + SURFACE_TYPE_SURFTYPE_2D = 1, + SURFACE_TYPE_SURFTYPE_3D = 2, + SURFACE_TYPE_SURFTYPE_CUBE = 3, + SURFACE_TYPE_SURFTYPE_BUFFER = 4, + SURFACE_TYPE_SURFTYPE_STRBUF = 5, + SURFACE_TYPE_SURFTYPE_NULL = 7, + }; + + enum INSTRUCTIONTARGET_OPCODE { + INSTRUCTIONTARGET_OPCODE_OPCODE = 0x41, + }; + + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.DwordLength = DWORD_LENGTH::DWORD_LENGTH_EXCLUDES_DWORD_0_1; + TheStructure.Common.InstructionTarget_Opcode = INSTRUCTIONTARGET_OPCODE_OPCODE; + TheStructure.Common.Client = CLIENT::CLIENT_2D_PROCESSOR; + } + + static XY_BLOCK_COPY_BLT sInit(void) { + XY_BLOCK_COPY_BLT state; + state.init(); + return state; + } + + inline uint32_t &getRawData(const uint32_t index) { + UNRECOVERABLE_IF(index < 22); + return TheStructure.RawData[index]; + } + + inline void setNumberofMultisamples(const NUMBER_OF_MULTISAMPLES value) { + TheStructure.Common.NumberofMultisamples = value; + } + + inline NUMBER_OF_MULTISAMPLES getNumberofMultisamples(void) const { + return static_cast(TheStructure.Common.NumberofMultisamples); + } + + inline void setSpecialModeofOperation(const SPECIAL_MODE_OF_OPERATION value) { + TheStructure.Common.SpecialModeofOperation = value; + } + + inline SPECIAL_MODE_OF_OPERATION getSpecialModeofOperation(void) const { + return static_cast(TheStructure.Common.SpecialModeofOperation); + } + + inline void setColorDepth(const COLOR_DEPTH value) { + TheStructure.Common.ColorDepth = value; + } + + inline COLOR_DEPTH getColorDepth(void) const { + return static_cast(TheStructure.Common.ColorDepth); + } + + inline void setInstructionTargetOpcode(const uint32_t value) { + TheStructure.Common.InstructionTarget_Opcode = value; + } + + inline uint32_t getInstructionTargetOpcode(void) const { + return (TheStructure.Common.InstructionTarget_Opcode); + } + + inline void setClient(const CLIENT value) { + TheStructure.Common.Client = value; + } + + inline CLIENT getClient(void) const { + return static_cast(TheStructure.Common.Client); + } + + inline void setDestinationPitch(const uint32_t value) { + TheStructure.Common.DestinationPitch = value - 1; + } + + inline uint32_t getDestinationPitch(void) const { + return (TheStructure.Common.DestinationPitch + 1); + } + + inline void setDestinationAuxiliarysurfacemode(const AUXILIARY_SURFACE_MODE value) { + TheStructure.Common.DestinationAuxiliarysurfacemode = value; + } + + inline AUXILIARY_SURFACE_MODE getDestinationAuxiliarysurfacemode(void) const { + return static_cast(TheStructure.Common.DestinationAuxiliarysurfacemode); + } + + inline void setDestinationMOCSvalue(const uint32_t value) { + TheStructure.Common.DestinationMOCSvalue = value; + } + + inline uint32_t getDestinationMOCSvalue(void) const { + return (TheStructure.Common.DestinationMOCSvalue); + } + + inline void setDestinationCompressionType(const COMPRESSION_TYPE value) { + TheStructure.Common.DestinationCompressionType = value; + } + + inline COMPRESSION_TYPE getDestinationCompressionType(void) const { + return static_cast(TheStructure.Common.DestinationCompressionType); + } + + inline void setDestinationCompressionEnable(const COMPRESSION_ENABLE value) { + TheStructure.Common.DestinationCompressionEnable = value; + } + + inline COMPRESSION_ENABLE getDestinationCompressionEnable(void) const { + return static_cast(TheStructure.Common.DestinationCompressionEnable); + } + + inline void setDestinationTiling(const TILING value) { + TheStructure.Common.DestinationTiling = value; + } + + inline TILING getDestinationTiling(void) const { + return static_cast(TheStructure.Common.DestinationTiling); + } + + inline void setDestinationX1CoordinateLeft(const uint32_t value) { + TheStructure.Common.DestinationX1Coordinate_Left = value; + } + + inline uint32_t getDestinationX1CoordinateLeft(void) const { + return (TheStructure.Common.DestinationX1Coordinate_Left); + } + + inline void setDestinationY1CoordinateTop(const uint32_t value) { + TheStructure.Common.DestinationY1Coordinate_Top = value; + } + + inline uint32_t getDestinationY1CoordinateTop(void) const { + return (TheStructure.Common.DestinationY1Coordinate_Top); + } + + inline void setDestinationX2CoordinateRight(const uint32_t value) { + TheStructure.Common.DestinationX2Coordinate_Right = value; + } + + inline uint32_t getDestinationX2CoordinateRight(void) const { + return (TheStructure.Common.DestinationX2Coordinate_Right); + } + + inline void setDestinationY2CoordinateBottom(const uint32_t value) { + TheStructure.Common.DestinationY2Coordinate_Bottom = value; + } + + inline uint32_t getDestinationY2CoordinateBottom(void) const { + return (TheStructure.Common.DestinationY2Coordinate_Bottom); + } + + inline void setDestinationBaseAddress(const uint64_t value) { + TheStructure.Common.DestinationBaseAddress = value; + } + + inline uint64_t getDestinationBaseAddress(void) const { + return (TheStructure.Common.DestinationBaseAddress); + } + + inline void setDestinationXoffset(const uint32_t value) { + TheStructure.Common.DestinationXoffset = value; + } + + inline uint32_t getDestinationXoffset(void) const { + return (TheStructure.Common.DestinationXoffset); + } + + inline void setDestinationYoffset(const uint32_t value) { + TheStructure.Common.DestinationYoffset = value; + } + + inline uint32_t getDestinationYoffset(void) const { + return (TheStructure.Common.DestinationYoffset); + } + + inline void setDestinationTargetMemory(const TARGET_MEMORY value) { + TheStructure.Common.DestinationTargetMemory = value; + } + + inline TARGET_MEMORY getDestinationTargetMemory(void) const { + return static_cast(TheStructure.Common.DestinationTargetMemory); + } + + inline void setSourceX1CoordinateLeft(const uint32_t value) { + TheStructure.Common.SourceX1Coordinate_Left = value; + } + + inline uint32_t getSourceX1CoordinateLeft(void) const { + return (TheStructure.Common.SourceX1Coordinate_Left); + } + + inline void setSourceY1CoordinateTop(const uint32_t value) { + TheStructure.Common.SourceY1Coordinate_Top = value; + } + + inline uint32_t getSourceY1CoordinateTop(void) const { + return (TheStructure.Common.SourceY1Coordinate_Top); + } + + inline void setSourcePitch(const uint32_t value) { + TheStructure.Common.SourcePitch = value - 1; + } + + inline uint32_t getSourcePitch(void) const { + return (TheStructure.Common.SourcePitch + 1); + } + + inline void setSourceAuxiliarysurfacemode(const AUXILIARY_SURFACE_MODE value) { + TheStructure.Common.SourceAuxiliarysurfacemode = value; + } + + inline AUXILIARY_SURFACE_MODE getSourceAuxiliarysurfacemode(void) const { + return static_cast(TheStructure.Common.SourceAuxiliarysurfacemode); + } + + inline void setSourceMOCS(const uint32_t value) { + TheStructure.Common.SourceMOCS = value; + } + + inline uint32_t getSourceMOCS(void) const { + return (TheStructure.Common.SourceMOCS); + } + + inline void setSourceCompressionType(const COMPRESSION_TYPE value) { + TheStructure.Common.SourceCompressionType = value; + } + + inline COMPRESSION_TYPE getSourceCompressionType(void) const { + return static_cast(TheStructure.Common.SourceCompressionType); + } + + inline void setSourceCompressionEnable(const COMPRESSION_ENABLE value) { + TheStructure.Common.SourceCompressionEnable = value; + } + + inline COMPRESSION_ENABLE getSourceCompressionEnable(void) const { + return static_cast(TheStructure.Common.SourceCompressionEnable); + } + + inline void setSourceTiling(const TILING value) { + TheStructure.Common.SourceTiling = value; + } + + inline TILING getSourceTiling(void) const { + return static_cast(TheStructure.Common.SourceTiling); + } + + inline void setSourceBaseAddress(const uint64_t value) { + TheStructure.Common.SourceBaseAddress = value; + } + + inline uint64_t getSourceBaseAddress(void) const { + return (TheStructure.Common.SourceBaseAddress); + } + + inline void setSourceXoffset(const uint32_t value) { + TheStructure.Common.SourceXoffset = value; + } + + inline uint32_t getSourceXoffset(void) const { + return (TheStructure.Common.SourceXoffset); + } + + inline void setSourceYoffset(const uint32_t value) { + TheStructure.Common.SourceYoffset = value; + } + + inline uint32_t getSourceYoffset(void) const { + return (TheStructure.Common.SourceYoffset); + } + + inline void setSourceTargetMemory(const TARGET_MEMORY value) { + TheStructure.Common.SourceTargetMemory = value; + } + + inline TARGET_MEMORY getSourceTargetMemory(void) const { + return static_cast(TheStructure.Common.SourceTargetMemory); + } + + inline void setSourceCompressionFormat(const uint32_t value) { + TheStructure.Common.SourceCompressionFormat = value; + } + + inline uint32_t getSourceCompressionFormat(void) const { + return (TheStructure.Common.SourceCompressionFormat); + } + + inline void setSourceClearValueEnable(const CLEAR_VALUE_ENABLE value) { + TheStructure.Common.SourceClearValueEnable = value; + } + + inline CLEAR_VALUE_ENABLE getSourceClearValueEnable(void) const { + return static_cast(TheStructure.Common.SourceClearValueEnable); + } + + typedef enum tagCLEARADDRESSLOW { + CLEARADDRESSLOW_BIT_SHIFT = 6, + CLEARADDRESSLOW_ALIGN_SIZE = 64, + } CLEARADDRESSLOW; + + inline void setSourceClearAddressLow(const uint32_t value) { + TheStructure.Common.SourceClearAddressLow = value >> CLEARADDRESSLOW_BIT_SHIFT; + } + + inline uint32_t getSourceClearAddressLow(void) const { + return (TheStructure.Common.SourceClearAddressLow << CLEARADDRESSLOW_BIT_SHIFT); + } + + inline void setSourceClearAddressHigh(const uint32_t value) { + TheStructure.Common.SourceClearAddressHigh = value; + } + + inline uint32_t getSourceClearAddressHigh(void) const { + return (TheStructure.Common.SourceClearAddressHigh); + } + + inline void setDestinationCompressionFormat(const uint32_t value) { + TheStructure.Common.DestinationCompressionFormat = value; + } + + inline uint32_t getDestinationCompressionFormat(void) const { + return (TheStructure.Common.DestinationCompressionFormat); + } + + inline void setDestinationClearValueEnable(const CLEAR_VALUE_ENABLE value) { + TheStructure.Common.DestinationClearValueEnable = value; + } + + inline CLEAR_VALUE_ENABLE getDestinationClearValueEnable(void) const { + return static_cast(TheStructure.Common.DestinationClearValueEnable); + } + + inline void setDestinationClearAddressLow(const uint32_t value) { + TheStructure.Common.DestinationClearAddressLow = value >> CLEARADDRESSLOW_BIT_SHIFT; + } + + inline uint32_t getDestinationClearAddressLow(void) const { + return (TheStructure.Common.DestinationClearAddressLow << CLEARADDRESSLOW_BIT_SHIFT); + } + + inline void setDestinationClearAddressHigh(const uint32_t value) { + TheStructure.Common.DestinationClearAddressHigh = value; + } + + inline uint32_t getDestinationClearAddressHigh(void) const { + return (TheStructure.Common.DestinationClearAddressHigh); + } + + inline void setDestinationSurfaceHeight(const uint32_t value) { + TheStructure.Common.DestinationSurfaceHeight = value - 1; + } + + inline uint32_t getDestinationSurfaceHeight(void) const { + return (TheStructure.Common.DestinationSurfaceHeight + 1); + } + + inline void setDestinationSurfaceWidth(const uint32_t value) { + TheStructure.Common.DestinationSurfaceWidth = value - 1; + } + + inline uint32_t getDestinationSurfaceWidth(void) const { + return (TheStructure.Common.DestinationSurfaceWidth + 1); + } + + inline void setDestinationSurfaceType(const SURFACE_TYPE value) { + TheStructure.Common.DestinationSurfaceType = value; + } + + inline SURFACE_TYPE getDestinationSurfaceType(void) const { + return static_cast(TheStructure.Common.DestinationSurfaceType); + } + + inline void setDestinationLOD(const uint32_t value) { + TheStructure.Common.DestinationLOD = value; + } + + inline uint32_t getDestinationLOD(void) const { + return (TheStructure.Common.DestinationLOD); + } + + inline void setDestinationSurfaceQpitch(const uint32_t value) { + TheStructure.Common.DestinationSurfaceQpitch = value; + } + + inline uint32_t getDestinationSurfaceQpitch(void) const { + return (TheStructure.Common.DestinationSurfaceQpitch); + } + + inline void setDestinationSurfaceDepth(const uint32_t value) { + TheStructure.Common.DestinationSurfaceDepth = value - 1; + } + + inline uint32_t getDestinationSurfaceDepth(void) const { + return (TheStructure.Common.DestinationSurfaceDepth + 1); + } + + inline void setDestinationHorizontalAlign(const uint32_t value) { + TheStructure.Common.DestinationHorizontalAlign = value; + } + + inline uint32_t getDestinationHorizontalAlign(void) const { + return (TheStructure.Common.DestinationHorizontalAlign); + } + + inline void setDestinationVerticalAlign(const uint32_t value) { + TheStructure.Common.DestinationVerticalAlign = value; + } + + inline uint32_t getDestinationVerticalAlign(void) const { + return (TheStructure.Common.DestinationVerticalAlign); + } + + inline void setDestinationSSID(const uint32_t value) { + TheStructure.Common.DestinationSSID = value; + } + + inline uint32_t getDestinationSSID(void) const { + return (TheStructure.Common.DestinationSSID); + } + + inline void setDestinationMipTailStartLOD(const uint32_t value) { + TheStructure.Common.DestinationMipTailStartLOD = value; + } + + inline uint32_t getDestinationMipTailStartLOD(void) const { + return (TheStructure.Common.DestinationMipTailStartLOD); + } + + inline void setDestinationDepthStencilResource(const uint32_t value) { + TheStructure.Common.DestinationDepthStencilResource = value; + } + + inline uint32_t getDestinationDepthStencilResource(void) const { + return (TheStructure.Common.DestinationDepthStencilResource); + } + + inline void setDestinationArrayIndex(const uint32_t value) { + TheStructure.Common.DestinationArrayIndex = value - 1; + } + + inline uint32_t getDestinationArrayIndex(void) const { + return (TheStructure.Common.DestinationArrayIndex + 1); + } + + inline void setSourceSurfaceHeight(const uint32_t value) { + TheStructure.Common.SourceSurfaceHeight = value - 1; + } + + inline uint32_t getSourceSurfaceHeight(void) const { + return (TheStructure.Common.SourceSurfaceHeight + 1); + } + + inline void setSourceSurfaceWidth(const uint32_t value) { + TheStructure.Common.SourceSurfaceWidth = value - 1; + } + + inline uint32_t getSourceSurfaceWidth(void) const { + return (TheStructure.Common.SourceSurfaceWidth + 1); + } + + inline void setSourceSurfaceType(const SURFACE_TYPE value) { + TheStructure.Common.SourceSurfaceType = value; + } + + inline SURFACE_TYPE getSourceSurfaceType(void) const { + return static_cast(TheStructure.Common.SourceSurfaceType); + } + + inline void setSourceLOD(const uint32_t value) { + TheStructure.Common.SourceLOD = value; + } + + inline uint32_t getSourceLOD(void) const { + return (TheStructure.Common.SourceLOD); + } + + inline void setSourceSurfaceQpitch(const uint32_t value) { + TheStructure.Common.SourceSurfaceQpitch = value; + } + + inline uint32_t getSourceSurfaceQpitch(void) const { + return (TheStructure.Common.SourceSurfaceQpitch); + } + + inline void setSourceSurfaceDepth(const uint32_t value) { + TheStructure.Common.SourceSurfaceDepth = value - 1; + } + + inline uint32_t getSourceSurfaceDepth(void) const { + return (TheStructure.Common.SourceSurfaceDepth + 1); + } + + inline void setSourceHorizontalAlign(const uint32_t value) { + TheStructure.Common.SourceHorizontalAlign = value; + } + + inline uint32_t getSourceHorizontalAlign(void) const { + return (TheStructure.Common.SourceHorizontalAlign); + } + + inline void setSourceVerticalAlign(const uint32_t value) { + TheStructure.Common.SourceVerticalAlign = value; + } + + inline uint32_t getSourceVerticalAlign(void) const { + return (TheStructure.Common.SourceVerticalAlign); + } + + inline void setSourceSSID(const uint32_t value) { + TheStructure.Common.SourceSSID = value; + } + + inline uint32_t getSourceSSID(void) const { + return (TheStructure.Common.SourceSSID); + } + + inline void setSourceMipTailStartLOD(const uint32_t value) { + TheStructure.Common.SourceMipTailStartLOD = value; + } + + inline uint32_t getSourceMipTailStartLOD(void) const { + return (TheStructure.Common.SourceMipTailStartLOD); + } + + inline void setSourceDepthStencilResource(const uint32_t value) { + TheStructure.Common.SourceDepthStencilResource = value; + } + + inline uint32_t getSourceDepthStencilResource(void) const { + return (TheStructure.Common.SourceDepthStencilResource); + } + + inline void setSourceArrayIndex(const uint32_t value) { + TheStructure.Common.SourceArrayIndex = value - 1; + } + + inline uint32_t getSourceArrayIndex(void) const { + return (TheStructure.Common.SourceArrayIndex + 1); + } +}; +STATIC_ASSERT(88 == sizeof(XY_BLOCK_COPY_BLT)); + +struct XY_FAST_COLOR_BLT { + union tagTheStructure { + struct tagCommon { + /// DWORD 0 + uint32_t DwordLength : BITFIELD_RANGE(0, 7); + uint32_t Reserved_8 : BITFIELD_RANGE(8, 8); + uint32_t NumberofMultisamples : BITFIELD_RANGE(9, 11); + uint32_t SpecialModeofOperation : BITFIELD_RANGE(12, 13); + uint32_t Reserved_14 : BITFIELD_RANGE(14, 18); + uint32_t ColorDepth : BITFIELD_RANGE(19, 21); + uint32_t InstructionTarget_Opcode : BITFIELD_RANGE(22, 28); + uint32_t Client : BITFIELD_RANGE(29, 31); + + /// DWORD 1 + uint32_t DestinationPitch : BITFIELD_RANGE(0, 17); + uint32_t DestinationAuxiliarysurfacemode : BITFIELD_RANGE(18, 20); + uint32_t DestinationMOCSvalue : BITFIELD_RANGE(21, 27); + uint32_t DestinationCompressionType : BITFIELD_RANGE(28, 28); + uint32_t DestinationCompressionEnable : BITFIELD_RANGE(29, 29); + uint32_t DestinationTiling : BITFIELD_RANGE(30, 31); + + /// DWORD 2 + uint32_t DestinationX1Coordinate_Left : BITFIELD_RANGE(0, 15); + uint32_t DestinationY1Coordinate_Top : BITFIELD_RANGE(16, 31); + + /// DWORD 3 + uint32_t DestinationX2Coordinate_Right : BITFIELD_RANGE(0, 15); + uint32_t DestinationY2Coordinate_Bottom : BITFIELD_RANGE(16, 31); + + /// DWORD 4..5 + uint64_t DestinationBaseAddress; + + /// DWORD 6 + uint32_t DestinationXoffset : BITFIELD_RANGE(0, 13); + uint32_t Reserved_206 : BITFIELD_RANGE(14, 15); + uint32_t DestinationYoffset : BITFIELD_RANGE(16, 29); + uint32_t Reserved_222 : BITFIELD_RANGE(30, 30); + uint32_t DestinationTargetMemory : BITFIELD_RANGE(31, 31); + + /// DWORD 7 - 10 + uint32_t FillColor[4]; + + // DWORD 11 + uint32_t DestinationCompressionFormat : BITFIELD_RANGE(0, 4); + uint32_t DestinationClearValueEnable : BITFIELD_RANGE(5, 5); + uint32_t DestinationClearAddressLow : BITFIELD_RANGE(6, 31); + + // DWORD 12 + uint32_t DestinationClearAddressHigh : BITFIELD_RANGE(0, 15); + uint32_t Reserved1 : BITFIELD_RANGE(16, 31); + + // DWORD 13 + uint32_t DestinationSurfaceHeight : BITFIELD_RANGE(0, 13); + uint32_t DestinationSurfaceWidth : BITFIELD_RANGE(14, 27); + uint32_t Reserved2 : BITFIELD_RANGE(28, 28); + uint32_t DestinationSurfaceType : BITFIELD_RANGE(29, 31); + + // DWORD 14 + uint32_t DestinationLOD : BITFIELD_RANGE(0, 3); + uint32_t DestinationSurfaceQpitch : BITFIELD_RANGE(4, 18); + uint32_t Reserved3 : BITFIELD_RANGE(19, 20); + uint32_t DestinationSurfaceDepth : BITFIELD_RANGE(21, 31); + + // DWORD 15 + uint32_t DestinationHorizontalAlign : BITFIELD_RANGE(0, 1); + uint32_t Reserved4 : BITFIELD_RANGE(2, 2); + uint32_t DestinationVerticalAlign : BITFIELD_RANGE(3, 4); + uint32_t Reserved5 : BITFIELD_RANGE(5, 7); + uint32_t DestinationMipTailStartLOD : BITFIELD_RANGE(8, 11); + uint32_t Reserved6 : BITFIELD_RANGE(12, 17); + uint32_t DestinationDepthStencilResource : BITFIELD_RANGE(18, 18); + uint32_t Reserved7 : BITFIELD_RANGE(19, 20); + uint32_t DestinationArrayIndex : BITFIELD_RANGE(21, 31); + } Common; + uint32_t RawData[15]; + } TheStructure; + + enum DWORD_LENGTH { + DWORD_LENGTH_EXCLUDES_DWORD_0_1 = 0xE, + }; + + enum NUMBER_OF_MULTISAMPLES { + NUMBER_OF_MULTISAMPLES_MULTISAMPLECOUNT_1 = 0, + NUMBER_OF_MULTISAMPLES_MULTISAMPLECOUNT_2 = 1, + NUMBER_OF_MULTISAMPLES_MULTISAMPLECOUNT_4 = 2, + NUMBER_OF_MULTISAMPLES_MULTISAMPLECOUNT_8 = 3, + NUMBER_OF_MULTISAMPLES_MULTISAMPLECOUNT_16 = 4, + }; + + enum SPECIAL_MODE_OF_OPERATION { + SPECIAL_MODE_OF_OPERATION_NONE = 0, + SPECIAL_MODE_OF_OPERATION_FULL_RESOLVE = 1, + SPECIAL_MODE_OF_OPERATION_PARTIAL_RESOLVE = 2, + }; + + enum COLOR_DEPTH { + COLOR_DEPTH_8_BIT_COLOR = 0, + COLOR_DEPTH_16_BIT_COLOR = 1, + COLOR_DEPTH_32_BIT_COLOR = 2, + COLOR_DEPTH_64_BIT_COLOR = 3, + COLOR_DEPTH_96_BIT_COLOR_ONLY_LINEAR_CASE_IS_SUPPORTED = 4, + COLOR_DEPTH_128_BIT_COLOR = 5, + }; + + enum CLIENT { + CLIENT_2D_PROCESSOR = 2, + }; + + enum DESTINATION_AUXILIARY_SURFACE_MODE { + DESTINATION_AUXILIARY_SURFACE_MODE_AUX_NONE = 0, + DESTINATION_AUXILIARY_SURFACE_MODE_AUX_CCS_E = 5, + }; + + enum DESTINATION_CLEAR_VALUE_ENABLE { + DESTINATION_CLEAR_VALUE_ENABLE_DISABLE = 0, + DESTINATION_CLEAR_VALUE_ENABLE_ENABLE = 1, + }; + + enum DESTINATION_COMPRESSION_TYPE { + DESTINATION_COMPRESSION_TYPE_3D_COMPRESSION = 0, + DESTINATION_COMPRESSION_TYPE_MEDIA_COMPRESSION = 1, + }; + + enum DESTINATION_COMPRESSION_ENABLE { + DESTINATION_COMPRESSION_ENABLE_COMPRESSION_DISABLE = 0, + DESTINATION_COMPRESSION_ENABLE_COMPRESSION_ENABLE = 1, + }; + + enum DESTINATION_TILING { + DESTINATION_TILING_LINEAR = 0, + DESTINATION_TILING_TILE4 = 2, + DESTINATION_TILING_TILE64 = 3, + }; + + enum DESTINATION_TARGET_MEMORY { + DESTINATION_TARGET_MEMORY_LOCAL_MEM = 0, + DESTINATION_TARGET_MEMORY_SYSTEM_MEM = 1, + }; + + enum DESTINATION_SURFACE_TYPE { + DESTINATION_SURFACE_TYPE_1D = 0, + DESTINATION_SURFACE_TYPE_2D = 1, + DESTINATION_SURFACE_TYPE_3D = 2, + DESTINATION_SURFACE_TYPE_CUBE = 3, + }; + enum INSTRUCTIONTARGET_OPCODE { + INSTRUCTIONTARGET_OPCODE_OPCODE = 0x44, + }; + + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.DwordLength = DWORD_LENGTH::DWORD_LENGTH_EXCLUDES_DWORD_0_1; + TheStructure.Common.InstructionTarget_Opcode = INSTRUCTIONTARGET_OPCODE::INSTRUCTIONTARGET_OPCODE_OPCODE; + TheStructure.Common.Client = CLIENT::CLIENT_2D_PROCESSOR; + } + + static XY_FAST_COLOR_BLT sInit(void) { + XY_FAST_COLOR_BLT state; + state.init(); + return state; + } + + inline uint32_t &getRawData(const uint32_t index) { + UNRECOVERABLE_IF(index < 22); + return TheStructure.RawData[index]; + } + + inline void setNumberofMultisamples(const NUMBER_OF_MULTISAMPLES value) { + TheStructure.Common.NumberofMultisamples = value; + } + + inline NUMBER_OF_MULTISAMPLES getNumberofMultisamples(void) const { + return static_cast(TheStructure.Common.NumberofMultisamples); + } + + inline void setSpecialModeofOperation(const SPECIAL_MODE_OF_OPERATION value) { + TheStructure.Common.SpecialModeofOperation = value; + } + + inline SPECIAL_MODE_OF_OPERATION getSpecialModeofOperation(void) const { + return static_cast(TheStructure.Common.SpecialModeofOperation); + } + + inline void setColorDepth(const COLOR_DEPTH value) { + TheStructure.Common.ColorDepth = value; + } + + inline COLOR_DEPTH getColorDepth(void) const { + return static_cast(TheStructure.Common.ColorDepth); + } + + inline void setInstructionTargetOpcode(const uint32_t value) { + TheStructure.Common.InstructionTarget_Opcode = value; + } + + inline uint32_t getInstructionTargetOpcode(void) const { + return (TheStructure.Common.InstructionTarget_Opcode); + } + + inline void setClient(const CLIENT value) { + TheStructure.Common.Client = value; + } + + inline CLIENT getClient(void) const { + return static_cast(TheStructure.Common.Client); + } + + inline void setDestinationPitch(const uint32_t value) { + TheStructure.Common.DestinationPitch = value - 1; + } + + inline uint32_t getDestinationPitch(void) const { + return (TheStructure.Common.DestinationPitch + 1); + } + + inline void setDestinationAuxiliarysurfacemode(const DESTINATION_AUXILIARY_SURFACE_MODE value) { + TheStructure.Common.DestinationAuxiliarysurfacemode = value; + } + + inline DESTINATION_AUXILIARY_SURFACE_MODE getDestinationAuxiliarysurfacemode(void) const { + return static_cast(TheStructure.Common.DestinationAuxiliarysurfacemode); + } + + inline void setDestinationMOCSvalue(const uint32_t value) { + TheStructure.Common.DestinationMOCSvalue = value; + } + + inline uint32_t getDestinationMOCSvalue(void) const { + return (TheStructure.Common.DestinationMOCSvalue); + } + + inline void setDestinationCompressionType(const DESTINATION_COMPRESSION_TYPE value) { + TheStructure.Common.DestinationCompressionType = value; + } + + inline DESTINATION_COMPRESSION_TYPE getDestinationCompressionType(void) const { + return static_cast(TheStructure.Common.DestinationCompressionType); + } + + inline void setDestinationCompressionEnable(const DESTINATION_COMPRESSION_ENABLE value) { + TheStructure.Common.DestinationCompressionEnable = value; + } + + inline DESTINATION_COMPRESSION_ENABLE getDestinationCompressionEnable(void) const { + return static_cast(TheStructure.Common.DestinationCompressionEnable); + } + + inline void setDestinationTiling(const DESTINATION_TILING value) { + TheStructure.Common.DestinationTiling = value; + } + + inline DESTINATION_TILING getDestinationTiling(void) const { + return static_cast(TheStructure.Common.DestinationTiling); + } + + inline void setDestinationX1CoordinateLeft(const uint32_t value) { + TheStructure.Common.DestinationX1Coordinate_Left = value; + } + + inline uint32_t getDestinationX1CoordinateLeft(void) const { + return (TheStructure.Common.DestinationX1Coordinate_Left); + } + + inline void setDestinationY1CoordinateTop(const uint32_t value) { + TheStructure.Common.DestinationY1Coordinate_Top = value; + } + + inline uint32_t getDestinationY1CoordinateTop(void) const { + return (TheStructure.Common.DestinationY1Coordinate_Top); + } + + inline void setDestinationX2CoordinateRight(const uint32_t value) { + TheStructure.Common.DestinationX2Coordinate_Right = value; + } + + inline uint32_t getDestinationX2CoordinateRight(void) const { + return (TheStructure.Common.DestinationX2Coordinate_Right); + } + + inline void setDestinationY2CoordinateBottom(const uint32_t value) { + TheStructure.Common.DestinationY2Coordinate_Bottom = value; + } + + inline uint32_t getDestinationY2CoordinateBottom(void) const { + return (TheStructure.Common.DestinationY2Coordinate_Bottom); + } + + inline void setDestinationBaseAddress(const uint64_t value) { + TheStructure.Common.DestinationBaseAddress = value; + } + + inline uint64_t getDestinationBaseAddress(void) const { + return (TheStructure.Common.DestinationBaseAddress); + } + + inline void setDestinationXoffset(const uint32_t value) { + TheStructure.Common.DestinationXoffset = value; + } + + inline uint32_t getDestinationXoffset(void) const { + return (TheStructure.Common.DestinationXoffset); + } + + inline void setDestinationYoffset(const uint32_t value) { + TheStructure.Common.DestinationYoffset = value; + } + + inline uint32_t getDestinationYoffset(void) const { + return (TheStructure.Common.DestinationYoffset); + } + + inline void setDestinationTargetMemory(const DESTINATION_TARGET_MEMORY value) { + TheStructure.Common.DestinationTargetMemory = value; + } + + inline DESTINATION_TARGET_MEMORY getDestinationTargetMemory(void) const { + return static_cast(TheStructure.Common.DestinationTargetMemory); + } + + inline void setFillColor(const uint32_t *value) { + TheStructure.Common.FillColor[0] = value[0]; + TheStructure.Common.FillColor[1] = value[1]; + TheStructure.Common.FillColor[2] = value[2]; + TheStructure.Common.FillColor[3] = value[3]; + } + + inline void setDestinationCompressionFormat(const uint32_t value) { + TheStructure.Common.DestinationCompressionFormat = value; + } + + inline uint32_t getDestinationCompressionFormat(void) const { + return (TheStructure.Common.DestinationCompressionFormat); + } + + inline void setDestinationClearValueEnable(const DESTINATION_CLEAR_VALUE_ENABLE value) { + TheStructure.Common.DestinationClearValueEnable = value; + } + + inline DESTINATION_CLEAR_VALUE_ENABLE getDestinationClearValueEnable(void) const { + return static_cast(TheStructure.Common.DestinationClearValueEnable); + } + + typedef enum tagDESTINATIONCLEARADDRESSLOW { + DESTINATIONCLEARADDRESSLOW_BIT_SHIFT = 6, + DESTINATIONCLEARADDRESSLOW_ALIGN_SIZE = 64, + } DESTINATIONCLEARADDRESSLOW; + + inline void setDestinationClearAddressLow(const uint32_t value) { + TheStructure.Common.DestinationClearAddressLow = value >> DESTINATIONCLEARADDRESSLOW_BIT_SHIFT; + } + + inline uint32_t getDestinationClearAddressLow(void) const { + return (TheStructure.Common.DestinationClearAddressLow << DESTINATIONCLEARADDRESSLOW_BIT_SHIFT); + } + + inline void setDestinationClearAddressHigh(const uint32_t value) { + TheStructure.Common.DestinationClearAddressHigh = value; + } + + inline uint32_t getDestinationClearAddressHigh(void) const { + return (TheStructure.Common.DestinationClearAddressHigh); + } + + inline void setDestinationSurfaceHeight(const uint32_t value) { + TheStructure.Common.DestinationSurfaceHeight = value - 1; + } + + inline uint32_t getDestinationSurfaceHeight(void) const { + return (TheStructure.Common.DestinationSurfaceHeight + 1); + } + + inline void setDestinationSurfaceWidth(const uint32_t value) { + TheStructure.Common.DestinationSurfaceWidth = value - 1; + } + + inline uint32_t getDestinationSurfaceWidth(void) const { + return (TheStructure.Common.DestinationSurfaceWidth + 1); + } + + inline void setDestinationSurfaceType(const DESTINATION_SURFACE_TYPE value) { + TheStructure.Common.DestinationSurfaceType = value; + } + + inline DESTINATION_SURFACE_TYPE getDestinationSurfaceType(void) const { + return static_cast(TheStructure.Common.DestinationSurfaceType); + } + + inline void setDestinationLOD(const uint32_t value) { + TheStructure.Common.DestinationLOD = value; + } + + inline uint32_t getDestinationLOD(void) const { + return (TheStructure.Common.DestinationLOD); + } + + inline void setDestinationSurfaceQpitch(const uint32_t value) { + TheStructure.Common.DestinationSurfaceQpitch = value; + } + + inline uint32_t getDestinationSurfaceQpitch(void) const { + return (TheStructure.Common.DestinationSurfaceQpitch); + } + + inline void setDestinationSurfaceDepth(const uint32_t value) { + TheStructure.Common.DestinationSurfaceDepth = value; + } + + inline uint32_t getDestinationSurfaceDepth(void) const { + return (TheStructure.Common.DestinationSurfaceDepth); + } + + inline void setDestinationHorizontalAlign(const uint32_t value) { + TheStructure.Common.DestinationHorizontalAlign = value; + } + + inline uint32_t getDestinationHorizontalAlign(void) const { + return (TheStructure.Common.DestinationHorizontalAlign); + } + + inline void setDestinationVerticalAlign(const uint32_t value) { + TheStructure.Common.DestinationVerticalAlign = value; + } + + inline uint32_t getDestinationVerticalAlign(void) const { + return (TheStructure.Common.DestinationVerticalAlign); + } + + inline void setDestinationMipTailStartLOD(const uint32_t value) { + TheStructure.Common.DestinationMipTailStartLOD = value; + } + + inline uint32_t getDestinationMipTailStartLOD(void) const { + return (TheStructure.Common.DestinationMipTailStartLOD); + } + + inline void setDestinationDepthStencilResource(const uint32_t value) { + TheStructure.Common.DestinationDepthStencilResource = value; + } + + inline uint32_t getDestinationDepthStencilResource(void) const { + return (TheStructure.Common.DestinationDepthStencilResource); + } + + inline void setDestinationArrayIndex(const uint32_t value) { + TheStructure.Common.DestinationArrayIndex = value - 1; + } + + inline uint32_t getDestinationArrayIndex(void) const { + return (TheStructure.Common.DestinationArrayIndex + 1); + } +}; + +STATIC_ASSERT(64 == sizeof(XY_FAST_COLOR_BLT)); + +struct MI_FLUSH_DW { + union tagData { + struct tagCommon { + /// DWORD 0 + uint32_t DwordLength : BITFIELD_RANGE(0, 5); + uint32_t Reserved_6 : BITFIELD_RANGE(6, 7); + uint32_t NotifyEnable : BITFIELD_RANGE(8, 8); + uint32_t FlushLlc : BITFIELD_RANGE(9, 9); + uint32_t Reserved_10 : BITFIELD_RANGE(10, 13); + uint32_t PostSyncOperation : BITFIELD_RANGE(14, 15); + uint32_t FlushCcs : BITFIELD_RANGE(16, 16); + uint32_t Reserved_17 : BITFIELD_RANGE(17, 17); + uint32_t TlbInvalidate : BITFIELD_RANGE(18, 18); + uint32_t Reserved_19 : BITFIELD_RANGE(19, 20); + uint32_t StoreDataIndex : BITFIELD_RANGE(21, 21); + uint32_t Reserved_22 : BITFIELD_RANGE(22, 22); + uint32_t MiCommandOpcode : BITFIELD_RANGE(23, 28); + uint32_t CommandType : BITFIELD_RANGE(29, 31); + + /// DWORD 1..2 + uint64_t Reserved_32 : BITFIELD_RANGE(0, 1); + uint64_t DestinationAddress : BITFIELD_RANGE(2, 47); + uint64_t Reserved_80 : BITFIELD_RANGE(48, 63); + + /// DWORD 3..4 + uint64_t ImmediateData; + + } Common; + uint32_t RawData[5]; + } TheStructure; + + enum DWORD_LENGTH { + DWORD_LENGTH_EXCLUDES_DWORD_0_1 = 3, + }; + + enum POST_SYNC_OPERATION { + POST_SYNC_OPERATION_NO_WRITE = 0, + POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD = 1, + POST_SYNC_OPERATION_WRITE_TIMESTAMP_REGISTER = 3, + }; + + enum MI_COMMAND_OPCODE { + MI_COMMAND_OPCODE_MI_FLUSH_DW = 38, + }; + + enum COMMAND_TYPE { + COMMAND_TYPE_MI_COMMAND = 0, + }; + + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.DwordLength = DWORD_LENGTH::DWORD_LENGTH_EXCLUDES_DWORD_0_1; + TheStructure.Common.MiCommandOpcode = MI_COMMAND_OPCODE::MI_COMMAND_OPCODE_MI_FLUSH_DW; + } + + static MI_FLUSH_DW sInit(void) { + MI_FLUSH_DW state; + state.init(); + return state; + } + + inline uint32_t &getRawData(const uint32_t index) { + UNRECOVERABLE_IF(index < 5); + return TheStructure.RawData[index]; + } + + inline void setNotifyEnable(const uint32_t value) { + TheStructure.Common.NotifyEnable = value; + } + + inline uint32_t getNotifyEnable(void) const { + return (TheStructure.Common.NotifyEnable); + } + + inline void setFlushLlc(const uint32_t value) { + TheStructure.Common.FlushLlc = value; + } + + inline uint32_t getFlushLlc(void) const { + return (TheStructure.Common.FlushLlc); + } + + inline void setFlushCcs(const uint32_t value) { + TheStructure.Common.FlushCcs = value; + } + + inline uint32_t getFlushCcs(void) const { + return (TheStructure.Common.FlushCcs); + } + + inline void setPostSyncOperation(const POST_SYNC_OPERATION value) { + TheStructure.Common.PostSyncOperation = value; + } + + inline POST_SYNC_OPERATION getPostSyncOperation(void) const { + return static_cast(TheStructure.Common.PostSyncOperation); + } + + inline void setTlbInvalidate(const uint32_t value) { + TheStructure.Common.TlbInvalidate = value; + } + + inline uint32_t getTlbInvalidate(void) const { + return (TheStructure.Common.TlbInvalidate); + } + + inline void setStoreDataIndex(const uint32_t value) { + TheStructure.Common.StoreDataIndex = value; + } + + inline uint32_t getStoreDataIndex(void) const { + return (TheStructure.Common.StoreDataIndex); + } + + typedef enum tagDESTINATIONADDRESS { + DESTINATIONADDRESS_BIT_SHIFT = 2, + DESTINATIONADDRESS_ALIGN_SIZE = 4, + } DESTINATIONADDRESS; + + inline void setDestinationAddress(const uint64_t value) { + TheStructure.Common.DestinationAddress = value >> DESTINATIONADDRESS_BIT_SHIFT; + } + + inline uint64_t getDestinationAddress(void) const { + return (TheStructure.Common.DestinationAddress << DESTINATIONADDRESS_BIT_SHIFT); + } + + inline void setImmediateData(const uint64_t value) { + TheStructure.Common.ImmediateData = value; + } + + inline uint64_t getImmediateData(void) const { + return (TheStructure.Common.ImmediateData); + } +}; +STATIC_ASSERT(20 == sizeof(MI_FLUSH_DW)); + +typedef struct tag_3DSTATE_BTD_BODY { + union tagTheStructure { + struct tagCommon { + // DWORD 0 + uint32_t DispatchTimeoutCounter : BITFIELD_RANGE(0, 1); + uint32_t Reserved_2 : BITFIELD_RANGE(2, 2); + uint32_t AmfsMode : BITFIELD_RANGE(3, 4); + uint32_t Reserved_5 : BITFIELD_RANGE(5, 31); + // DWORD 1 + uint64_t PerDssMemoryBackedBufferSize : BITFIELD_RANGE(0, 2); + uint64_t Reserved_35 : BITFIELD_RANGE(3, 9); + uint64_t MemoryBackedBufferBasePointer : BITFIELD_RANGE(10, 63); + // DWORD 3 + uint64_t PerThreadScratchSpace : BITFIELD_RANGE(0, 3); + uint64_t Reserved_100 : BITFIELD_RANGE(4, 9); + uint64_t BtdScratchSpaceBasePointer : BITFIELD_RANGE(10, 31); + // DWORD 4 + uint64_t Reserved_128 : BITFIELD_RANGE(32, 63); + } Common; + uint32_t RawData[5]; + } TheStructure; + typedef enum tagAMFS_MODE { + AMFS_MODE_NORMAL_MODE = 0x0, + AMFS_MODE_TOUCH_MODE = 0x1, + AMFS_MODE_BACKFILL_MODE = 0x2, + AMFS_MODE_FALLBACK_MODE = 0x3, + } AMFS_MODE; + typedef enum tagPER_DSS_MEMORY_BACKED_BUFFER_SIZE { + PER_DSS_MEMORY_BACKED_BUFFER_SIZE_2KB = 0x0, + PER_DSS_MEMORY_BACKED_BUFFER_SIZE_4KB = 0x1, + PER_DSS_MEMORY_BACKED_BUFFER_SIZE_8KB = 0x2, + PER_DSS_MEMORY_BACKED_BUFFER_SIZE_16KB = 0x3, + PER_DSS_MEMORY_BACKED_BUFFER_SIZE_32KB = 0x4, + PER_DSS_MEMORY_BACKED_BUFFER_SIZE_64KB = 0x5, + PER_DSS_MEMORY_BACKED_BUFFER_SIZE_128KB = 0x6, + } PER_DSS_MEMORY_BACKED_BUFFER_SIZE; + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.AmfsMode = AMFS_MODE_NORMAL_MODE; + TheStructure.Common.PerDssMemoryBackedBufferSize = PER_DSS_MEMORY_BACKED_BUFFER_SIZE_128KB; + } + static tag_3DSTATE_BTD_BODY sInit(void) { + _3DSTATE_BTD_BODY state; + state.init(); + return state; + } + inline uint32_t &getRawData(const uint32_t index) { + UNRECOVERABLE_IF(index >= 5); + return TheStructure.RawData[index]; + } + inline void setDispatchTimeoutCounter(const uint32_t value) { + UNRECOVERABLE_IF(value > 0x3); + TheStructure.Common.DispatchTimeoutCounter = value; + } + inline uint32_t getDispatchTimeoutCounter(void) const { + return TheStructure.Common.DispatchTimeoutCounter; + } + inline void setAmfsMode(const AMFS_MODE value) { + TheStructure.Common.AmfsMode = value; + } + inline AMFS_MODE getAmfsMode(void) const { + return static_cast(TheStructure.Common.AmfsMode); + } + inline void setPerDssMemoryBackedBufferSize(const PER_DSS_MEMORY_BACKED_BUFFER_SIZE value) { + TheStructure.Common.PerDssMemoryBackedBufferSize = value; + } + inline PER_DSS_MEMORY_BACKED_BUFFER_SIZE getPerDssMemoryBackedBufferSize(void) const { + return static_cast(TheStructure.Common.PerDssMemoryBackedBufferSize); + } + typedef enum tagMEMORYBACKEDBUFFERBASEPOINTER { + MEMORYBACKEDBUFFERBASEPOINTER_BIT_SHIFT = 0xa, + MEMORYBACKEDBUFFERBASEPOINTER_ALIGN_SIZE = 0x400, + } MEMORYBACKEDBUFFERBASEPOINTER; + inline void setMemoryBackedBufferBasePointer(const uint64_t value) { + UNRECOVERABLE_IF(value > 0xffffffffffffffffL); + TheStructure.Common.MemoryBackedBufferBasePointer = value >> MEMORYBACKEDBUFFERBASEPOINTER_BIT_SHIFT; + } + inline uint64_t getMemoryBackedBufferBasePointer(void) const { + return TheStructure.Common.MemoryBackedBufferBasePointer << MEMORYBACKEDBUFFERBASEPOINTER_BIT_SHIFT; + } + inline void setPerThreadScratchSpace(const uint64_t value) { + UNRECOVERABLE_IF(value > 0xfL); + TheStructure.Common.PerThreadScratchSpace = value; + } + inline uint64_t getPerThreadScratchSpace(void) const { + return TheStructure.Common.PerThreadScratchSpace; + } + typedef enum tagBTDSCRATCHSPACEBASEPOINTER { + BTDSCRATCHSPACEBASEPOINTER_BIT_SHIFT = 0xa, + BTDSCRATCHSPACEBASEPOINTER_ALIGN_SIZE = 0x400, + } BTDSCRATCHSPACEBASEPOINTER; + inline void setBtdScratchSpaceBasePointer(const uint64_t value) { + UNRECOVERABLE_IF(value > 0xffffffffL); + TheStructure.Common.BtdScratchSpaceBasePointer = value >> BTDSCRATCHSPACEBASEPOINTER_BIT_SHIFT; + } + inline uint64_t getBtdScratchSpaceBasePointer(void) const { + return TheStructure.Common.BtdScratchSpaceBasePointer << BTDSCRATCHSPACEBASEPOINTER_BIT_SHIFT; + } +} _3DSTATE_BTD_BODY; +STATIC_ASSERT(20 == sizeof(_3DSTATE_BTD_BODY)); + +typedef struct tag_3DSTATE_BTD { + union tagTheStructure { + struct tagCommon { + // DWORD 0 + uint32_t DwordLength : BITFIELD_RANGE(0, 7); + uint32_t Reserved_8 : BITFIELD_RANGE(8, 15); + uint32_t _3DCommandSubOpcode : BITFIELD_RANGE(16, 23); + uint32_t _3DCommandOpcode : BITFIELD_RANGE(24, 26); + uint32_t CommandSubtype : BITFIELD_RANGE(27, 28); + uint32_t CommandType : BITFIELD_RANGE(29, 31); + // DWORD 1 + _3DSTATE_BTD_BODY BtdStateBody; + } Common; + uint32_t RawData[6]; + } TheStructure; + typedef enum tagDWORD_LENGTH { + DWORD_LENGTH_DWORD_COUNT_N = 0x4, + } DWORD_LENGTH; + typedef enum tag_3D_COMMAND_SUB_OPCODE { + _3D_COMMAND_SUB_OPCODE_3DSTATE_BTD = 0x6, + } _3D_COMMAND_SUB_OPCODE; + typedef enum tag_3D_COMMAND_OPCODE { + _3D_COMMAND_OPCODE_GFXPIPE_NONPIPELINED = 0x1, + } _3D_COMMAND_OPCODE; + typedef enum tagCOMMAND_SUBTYPE { + COMMAND_SUBTYPE_GFXPIPE_COMMON = 0x0, + } COMMAND_SUBTYPE; + typedef enum tagCOMMAND_TYPE { + COMMAND_TYPE_GFXPIPE = 0x3, + } COMMAND_TYPE; + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.DwordLength = DWORD_LENGTH_DWORD_COUNT_N; + TheStructure.Common._3DCommandSubOpcode = _3D_COMMAND_SUB_OPCODE_3DSTATE_BTD; + TheStructure.Common._3DCommandOpcode = _3D_COMMAND_OPCODE_GFXPIPE_NONPIPELINED; + TheStructure.Common.CommandSubtype = COMMAND_SUBTYPE_GFXPIPE_COMMON; + TheStructure.Common.CommandType = COMMAND_TYPE_GFXPIPE; + TheStructure.Common.BtdStateBody.init(); + } + static tag_3DSTATE_BTD sInit(void) { + _3DSTATE_BTD state; + state.init(); + return state; + } + inline uint32_t &getRawData(const uint32_t index) { + UNRECOVERABLE_IF(index >= 6); + return TheStructure.RawData[index]; + } + inline void setBtdStateBody(const _3DSTATE_BTD_BODY &value) { + TheStructure.Common.BtdStateBody = value; + } + inline _3DSTATE_BTD_BODY &getBtdStateBody(void) { + return TheStructure.Common.BtdStateBody; + } +} _3DSTATE_BTD; +STATIC_ASSERT(24 == sizeof(_3DSTATE_BTD)); +STATIC_ASSERT(std::is_pod<_3DSTATE_BTD>::value); + +typedef struct tagGRF { + union tagTheStructure { + float fRegs[8]; + uint32_t dwRegs[8]; + uint16_t wRegs[16]; + uint32_t RawData[8]; + } TheStructure; +} GRF; +STATIC_ASSERT(32 == sizeof(GRF)); + +typedef struct tagPOSTSYNC_DATA { + union tagTheStructure { + struct tagCommon { + // DWORD 0 + uint32_t Operation : BITFIELD_RANGE(0, 1); + uint32_t DataportPipelineFlush : BITFIELD_RANGE(2, 2); + uint32_t L3Flush : BITFIELD_RANGE(3, 3); + uint32_t MocsReserved_4 : BITFIELD_RANGE(4, 4); + uint32_t MocsIndexToMocsTables : BITFIELD_RANGE(5, 10); + uint32_t Reserved_13 : BITFIELD_RANGE(11, 31); + // DWORD 1 + uint64_t DestinationAddress; + // DWORD 3 + uint64_t ImmediateData; + } Common; + uint32_t RawData[5]; + } TheStructure; + typedef enum tagOPERATION { + OPERATION_NO_WRITE = 0x0, + OPERATION_WRITE_IMMEDIATE_DATA = 0x1, + OPERATION_WRITE_TIMESTAMP = 0x3, + } OPERATION; + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.Operation = OPERATION_NO_WRITE; + } + static tagPOSTSYNC_DATA sInit(void) { + POSTSYNC_DATA state; + state.init(); + return state; + } + inline uint32_t &getRawData(const uint32_t index) { + UNRECOVERABLE_IF(index >= 5); + return TheStructure.RawData[index]; + } + inline void setOperation(const OPERATION value) { + TheStructure.Common.Operation = value; + } + inline OPERATION getOperation(void) const { + return static_cast(TheStructure.Common.Operation); + } + inline void setDataportPipelineFlush(const bool value) { + TheStructure.Common.DataportPipelineFlush = value; + } + inline bool getDataportPipelineFlush(void) const { + return TheStructure.Common.DataportPipelineFlush; + } + inline void setL3Flush(const bool value) { + TheStructure.Common.L3Flush = value; + } + inline bool getL3Flush(void) const { + return TheStructure.Common.L3Flush; + } + inline void setMocs(const uint32_t value) { // patched + UNRECOVERABLE_IF(value > 0x7f); + TheStructure.Common.MocsReserved_4 = value; + TheStructure.Common.MocsIndexToMocsTables = value >> 1; + } + inline uint32_t getMocs(void) const { // patched + return (TheStructure.Common.MocsIndexToMocsTables << 1) | TheStructure.Common.MocsReserved_4; + } + inline void setDestinationAddress(const uint64_t value) { + TheStructure.Common.DestinationAddress = value; + } + inline uint64_t getDestinationAddress(void) const { + return TheStructure.Common.DestinationAddress; + } + inline void setImmediateData(const uint64_t value) { + TheStructure.Common.ImmediateData = value; + } + inline uint64_t getImmediateData(void) const { + return TheStructure.Common.ImmediateData; + } +} POSTSYNC_DATA; +STATIC_ASSERT(20 == sizeof(POSTSYNC_DATA)); + +typedef struct tagINTERFACE_DESCRIPTOR_DATA { + union tagTheStructure { + struct tagCommon { + uint32_t Reserved_0_0_5 : BITFIELD_RANGE(0, 5); + uint32_t KernelStartPointer : BITFIELD_RANGE(6, 31); + + uint32_t KernelStartPointerHigh : BITFIELD_RANGE(0, 15); + uint32_t Reserved_1_16_31 : BITFIELD_RANGE(16, 31); + + uint32_t Reserved_2_0_6 : BITFIELD_RANGE(0, 6); + uint32_t SoftwareExceptionEnable : BITFIELD_RANGE(7, 7); + uint32_t Reserved_2_8_10 : BITFIELD_RANGE(8, 10); + uint32_t MaskStackExceptionEnable : BITFIELD_RANGE(11, 11); + uint32_t Reserved_2_12_12 : BITFIELD_RANGE(12, 12); + uint32_t IllegalOpcodeExceptionEnable : BITFIELD_RANGE(13, 13); + uint32_t Reserved_2_14_15 : BITFIELD_RANGE(14, 15); + uint32_t FloatingPointMode : BITFIELD_RANGE(16, 16); + uint32_t Reserved_2_17_17 : BITFIELD_RANGE(17, 17); + uint32_t SingleProgramFlow : BITFIELD_RANGE(18, 18); + uint32_t DenormMode : BITFIELD_RANGE(19, 19); + uint32_t ThreadPreemptionDisable : BITFIELD_RANGE(20, 20); + uint32_t Reserved_2_21_31 : BITFIELD_RANGE(21, 31); + + uint32_t Reserved_3_0_1 : BITFIELD_RANGE(0, 1); + uint32_t SamplerCount : BITFIELD_RANGE(2, 4); + uint32_t SamplerStatePointer : BITFIELD_RANGE(5, 31); + + uint32_t BindingTableEntryCount : BITFIELD_RANGE(0, 4); + uint32_t BindingTablePointer : BITFIELD_RANGE(5, 20); + uint32_t Reserved_4_21_31 : BITFIELD_RANGE(21, 31); + + uint32_t NumberOfThreadsInGpgpuThreadGroup : BITFIELD_RANGE(0, 9); + uint32_t Reserved_5_10_15 : BITFIELD_RANGE(10, 15); + uint32_t SharedLocalMemorySize : BITFIELD_RANGE(16, 20); + uint32_t BarrierEnable : BITFIELD_RANGE(21, 21); + uint32_t RoundingMode : BITFIELD_RANGE(22, 23); + uint32_t Reserved_5_24_25 : BITFIELD_RANGE(24, 25); + uint32_t ThreadGroupDispatchSize : BITFIELD_RANGE(26, 27); + uint32_t Reserved_5_28_31 : BITFIELD_RANGE(28, 31); + + uint32_t Reserved_6_0_31 : BITFIELD_RANGE(0, 31); + + uint32_t Reserved_7; + } Common; + uint32_t RawData[8]; + } TheStructure; + typedef enum tagFLOATING_POINT_MODE { + FLOATING_POINT_MODE_IEEE_754 = 0x0, + FLOATING_POINT_MODE_ALTERNATE = 0x1, + } FLOATING_POINT_MODE; + typedef enum tagSINGLE_PROGRAM_FLOW { + SINGLE_PROGRAM_FLOW_MULTIPLE = 0x0, + SINGLE_PROGRAM_FLOW_SINGLE = 0x1, + } SINGLE_PROGRAM_FLOW; + typedef enum tagDENORM_MODE { + DENORM_MODE_FTZ = 0x0, + DENORM_MODE_SETBYKERNEL = 0x1, + } DENORM_MODE; + typedef enum tagTHREAD_PREEMPTION_DISABLE { + THREAD_PREEMPTION_DISABLE_DISABLE = 0x0, + THREAD_PREEMPTION_DISABLE_ENABLE = 0x1, + } THREAD_PREEMPTION_DISABLE; + typedef enum tagSAMPLER_COUNT { + SAMPLER_COUNT_NO_SAMPLERS_USED = 0x0, + SAMPLER_COUNT_BETWEEN_1_AND_4_SAMPLERS_USED = 0x1, + SAMPLER_COUNT_BETWEEN_5_AND_8_SAMPLERS_USED = 0x2, + SAMPLER_COUNT_BETWEEN_9_AND_12_SAMPLERS_USED = 0x3, + SAMPLER_COUNT_BETWEEN_13_AND_16_SAMPLERS_USED = 0x4, + } SAMPLER_COUNT; + typedef enum tagSHARED_LOCAL_MEMORY_SIZE { + SHARED_LOCAL_MEMORY_SIZE_ENCODES_0K = 0x0, + SHARED_LOCAL_MEMORY_SIZE_ENCODES_1K = 0x1, + SHARED_LOCAL_MEMORY_SIZE_ENCODES_2K = 0x2, + SHARED_LOCAL_MEMORY_SIZE_ENCODES_4K = 0x3, + SHARED_LOCAL_MEMORY_SIZE_ENCODES_8K = 0x4, + SHARED_LOCAL_MEMORY_SIZE_ENCODES_16K = 0x5, + SHARED_LOCAL_MEMORY_SIZE_ENCODES_32K = 0x6, + SHARED_LOCAL_MEMORY_SIZE_ENCODES_64K = 0x7, + } SHARED_LOCAL_MEMORY_SIZE; + typedef enum tagROUNDING_MODE { + ROUNDING_MODE_RTNE = 0x0, + ROUNDING_MODE_RU = 0x1, + ROUNDING_MODE_RD = 0x2, + ROUNDING_MODE_RTZ = 0x3, + } ROUNDING_MODE; + + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.FloatingPointMode = FLOATING_POINT_MODE_IEEE_754; + TheStructure.Common.SingleProgramFlow = SINGLE_PROGRAM_FLOW_MULTIPLE; + TheStructure.Common.DenormMode = DENORM_MODE_FTZ; + TheStructure.Common.ThreadPreemptionDisable = + THREAD_PREEMPTION_DISABLE_DISABLE; + TheStructure.Common.SamplerCount = SAMPLER_COUNT_NO_SAMPLERS_USED; + TheStructure.Common.SharedLocalMemorySize = + SHARED_LOCAL_MEMORY_SIZE_ENCODES_0K; + TheStructure.Common.RoundingMode = ROUNDING_MODE_RTNE; + } + static tagINTERFACE_DESCRIPTOR_DATA sInit(void) { + INTERFACE_DESCRIPTOR_DATA state; + state.init(); + return state; + } + inline uint32_t &getRawData(const uint32_t index) { + DEBUG_BREAK_IF(index >= 8); + return TheStructure.RawData[index]; + } + typedef enum tagKERNELSTARTPOINTER { + KERNELSTARTPOINTER_BIT_SHIFT = 0x6, + KERNELSTARTPOINTER_ALIGN_SIZE = 0x40, + } KERNELSTARTPOINTER; + inline void setKernelStartPointer(const uint64_t value) { + DEBUG_BREAK_IF(value >= 0x100000000); + TheStructure.Common.KernelStartPointer = (uint32_t)value >> KERNELSTARTPOINTER_BIT_SHIFT; + } + inline uint32_t getKernelStartPointer(void) const { + return (TheStructure.Common.KernelStartPointer << KERNELSTARTPOINTER_BIT_SHIFT); + } + inline void setKernelStartPointerHigh(const uint32_t value) { + TheStructure.Common.KernelStartPointerHigh = value; + } + inline uint32_t getKernelStartPointerHigh(void) const { + return (TheStructure.Common.KernelStartPointerHigh); + } + inline void setSoftwareExceptionEnable(const uint32_t value) { + TheStructure.Common.SoftwareExceptionEnable = value; + } + inline uint32_t getSoftwareExceptionEnable(void) const { + return (TheStructure.Common.SoftwareExceptionEnable); + } + inline void setMaskStackExceptionEnable(const uint32_t value) { + TheStructure.Common.MaskStackExceptionEnable = value; + } + inline uint32_t getMaskStackExceptionEnable(void) const { + return (TheStructure.Common.MaskStackExceptionEnable); + } + inline void setIllegalOpcodeExceptionEnable(const uint32_t value) { + TheStructure.Common.IllegalOpcodeExceptionEnable = value; + } + inline uint32_t getIllegalOpcodeExceptionEnable(void) const { + return (TheStructure.Common.IllegalOpcodeExceptionEnable); + } + inline void setFloatingPointMode(const FLOATING_POINT_MODE value) { + TheStructure.Common.FloatingPointMode = value; + } + inline FLOATING_POINT_MODE getFloatingPointMode(void) const { + return static_cast(TheStructure.Common.FloatingPointMode); + } + inline void setSingleProgramFlow(const SINGLE_PROGRAM_FLOW value) { + TheStructure.Common.SingleProgramFlow = value; + } + inline SINGLE_PROGRAM_FLOW getSingleProgramFlow(void) const { + return static_cast(TheStructure.Common.SingleProgramFlow); + } + inline void setDenormMode(const DENORM_MODE value) { + TheStructure.Common.DenormMode = value; + } + inline DENORM_MODE getDenormMode(void) const { + return static_cast(TheStructure.Common.DenormMode); + } + inline void setThreadPreemptionDisable(const THREAD_PREEMPTION_DISABLE value) { + TheStructure.Common.ThreadPreemptionDisable = value; + } + inline THREAD_PREEMPTION_DISABLE getThreadPreemptionDisable(void) const { + return static_cast(TheStructure.Common.ThreadPreemptionDisable); + } + inline void setSamplerCount(const SAMPLER_COUNT value) { + TheStructure.Common.SamplerCount = value; + } + inline SAMPLER_COUNT getSamplerCount(void) const { + return static_cast(TheStructure.Common.SamplerCount); + } + typedef enum tagSAMPLERSTATEPOINTER { + SAMPLERSTATEPOINTER_BIT_SHIFT = 0x5, + SAMPLERSTATEPOINTER_ALIGN_SIZE = 0x20, + } SAMPLERSTATEPOINTER; + inline void setSamplerStatePointer(const uint64_t value) { + DEBUG_BREAK_IF(value >= 0x100000000); + TheStructure.Common.SamplerStatePointer = (uint32_t)value >> SAMPLERSTATEPOINTER_BIT_SHIFT; + } + inline uint32_t getSamplerStatePointer(void) const { + return (TheStructure.Common.SamplerStatePointer << SAMPLERSTATEPOINTER_BIT_SHIFT); + } + inline void setBindingTableEntryCount(const uint32_t value) { + TheStructure.Common.BindingTableEntryCount = value; + } + inline uint32_t getBindingTableEntryCount(void) const { + return (TheStructure.Common.BindingTableEntryCount); + } + typedef enum tagBINDINGTABLEPOINTER { + BINDINGTABLEPOINTER_BIT_SHIFT = 0x5, + BINDINGTABLEPOINTER_ALIGN_SIZE = 0x20, + } BINDINGTABLEPOINTER; + inline void setBindingTablePointer(const uint64_t value) { + DEBUG_BREAK_IF(value >= 0x100000000); + TheStructure.Common.BindingTablePointer = (uint32_t)value >> BINDINGTABLEPOINTER_BIT_SHIFT; + } + inline uint32_t getBindingTablePointer(void) const { + return (TheStructure.Common.BindingTablePointer << BINDINGTABLEPOINTER_BIT_SHIFT); + } + inline void setNumberOfThreadsInGpgpuThreadGroup(const uint32_t value) { + TheStructure.Common.NumberOfThreadsInGpgpuThreadGroup = value; + } + inline uint32_t getNumberOfThreadsInGpgpuThreadGroup(void) const { + return (TheStructure.Common.NumberOfThreadsInGpgpuThreadGroup); + } + inline void setSharedLocalMemorySize(const SHARED_LOCAL_MEMORY_SIZE value) { + TheStructure.Common.SharedLocalMemorySize = value; + } + inline SHARED_LOCAL_MEMORY_SIZE getSharedLocalMemorySize(void) const { + return static_cast(TheStructure.Common.SharedLocalMemorySize); + } + inline void setBarrierEnable(const uint32_t value) { + TheStructure.Common.BarrierEnable = (value > 0u) ? 1u : 0u; + } + inline bool getBarrierEnable(void) const { + return (TheStructure.Common.BarrierEnable); + } + inline void setRoundingMode(const ROUNDING_MODE value) { + TheStructure.Common.RoundingMode = value; + } + inline ROUNDING_MODE getRoundingMode(void) const { + return static_cast(TheStructure.Common.RoundingMode); + } + inline void setThreadGroupDispatchSize(const uint32_t value) { + TheStructure.Common.ThreadGroupDispatchSize = value; + } + inline uint32_t getThreadGroupDispatchSize(void) const { + return (TheStructure.Common.ThreadGroupDispatchSize); + } +} INTERFACE_DESCRIPTOR_DATA; +STATIC_ASSERT(32 == sizeof(INTERFACE_DESCRIPTOR_DATA)); + +typedef struct tagINLINE_DATA { + uint32_t RawData[8]; +} INLINE_DATA; +STATIC_ASSERT(32 == sizeof(INLINE_DATA)); + +typedef struct tagCOMPUTE_WALKER { + union tagTheStructure { + struct tagCommon { + // DWORD 0 + uint32_t DwordLength : BITFIELD_RANGE(0, 7); + uint32_t PredicateEnable : BITFIELD_RANGE(8, 8); + uint32_t WorkloadPartitionEnable : BITFIELD_RANGE(9, 9); + uint32_t IndirectParameterEnable : BITFIELD_RANGE(10, 10); + uint32_t UavWaitToProduce : BITFIELD_RANGE(11, 11); + uint32_t UavProducer : BITFIELD_RANGE(12, 12); + uint32_t UavConsumer : BITFIELD_RANGE(13, 13); + uint32_t SystolicModeEnable : BITFIELD_RANGE(14, 14); + uint32_t Reserved_15 : BITFIELD_RANGE(15, 15); + uint32_t CfeSubopcodeVariant : BITFIELD_RANGE(16, 17); + uint32_t CfeSubopcode : BITFIELD_RANGE(18, 23); + uint32_t ComputeCommandOpcode : BITFIELD_RANGE(24, 26); + uint32_t Pipeline : BITFIELD_RANGE(27, 28); + uint32_t CommandType : BITFIELD_RANGE(29, 31); + // DWORD 1 + uint32_t Reserved_32 : BITFIELD_RANGE(0, 7); + uint32_t Reserved_40 : BITFIELD_RANGE(8, 31); + // DWORD 2 + uint32_t IndirectDataLength : BITFIELD_RANGE(0, 16); + uint32_t Reserved_82 : BITFIELD_RANGE(17, 29); + uint32_t PartitionType : BITFIELD_RANGE(30, 31); + // DWORD 3 + uint32_t Reserved_96 : BITFIELD_RANGE(0, 5); + uint32_t IndirectDataStartAddress : BITFIELD_RANGE(6, 31); + // DWORD 4 + uint32_t Reserved_128 : BITFIELD_RANGE(0, 16); + uint32_t MessageSimd : BITFIELD_RANGE(17, 18); + uint32_t TileLayout : BITFIELD_RANGE(19, 21); + uint32_t WalkOrder : BITFIELD_RANGE(22, 24); + uint32_t EmitInlineParameter : BITFIELD_RANGE(25, 25); + uint32_t EmitLocalId : BITFIELD_RANGE(26, 28); + uint32_t GenerateLocalId : BITFIELD_RANGE(29, 29); + uint32_t SimdSize : BITFIELD_RANGE(30, 31); + // DWORD 5 + uint32_t ExecutionMask; + // DWORD 6 + uint32_t LocalXMaximum : BITFIELD_RANGE(0, 9); + uint32_t LocalYMaximum : BITFIELD_RANGE(10, 19); + uint32_t LocalZMaximum : BITFIELD_RANGE(20, 29); + uint32_t Reserved_222 : BITFIELD_RANGE(30, 31); + // DWORD 7 + uint32_t ThreadGroupIdXDimension; + // DWORD 8 + uint32_t ThreadGroupIdYDimension; + // DWORD 9 + uint32_t ThreadGroupIdZDimension; + // DWORD 10 + uint32_t ThreadGroupIdStartingX; + // DWORD 11 + uint32_t ThreadGroupIdStartingY; + // DWORD 12 + uint32_t ThreadGroupIdStartingZ; + // DWORD 13 + uint64_t PartitionId : BITFIELD_RANGE(0, 31); + // DWORD 14 + uint64_t PartitionSize : BITFIELD_RANGE(32, 63); + // DWORD 15 + uint32_t PreemptX; + // DWORD 16 + uint32_t PreemptY; + // DWORD 17 + uint32_t PreemptZ; + // DWORD 18 + INTERFACE_DESCRIPTOR_DATA InterfaceDescriptor; + // DWORD 26 + POSTSYNC_DATA PostSync; + // DWORD 31 + INLINE_DATA InlineData; + } Common; + uint32_t RawData[31]; + } TheStructure; + typedef enum tagDWORD_LENGTH { + DWORD_LENGTH_FIXED_SIZE = 0x25, + } DWORD_LENGTH; + typedef enum tagCFE_SUBOPCODE_VARIANT { + CFE_SUBOPCODE_VARIANT_STANDARD = 0x0, + } CFE_SUBOPCODE_VARIANT; + typedef enum tagCFE_SUBOPCODE { + CFE_SUBOPCODE_COMPUTE_WALKER = 0x2, + } CFE_SUBOPCODE; + typedef enum tagCOMPUTE_COMMAND_OPCODE { + COMPUTE_COMMAND_OPCODE_NEW_CFE_COMMAND = 0x2, + } COMPUTE_COMMAND_OPCODE; + typedef enum tagPIPELINE { + PIPELINE_COMPUTE = 0x2, + } PIPELINE; + typedef enum tagCOMMAND_TYPE { + COMMAND_TYPE_GFXPIPE = 0x3, + } COMMAND_TYPE; + typedef enum tagPARTITION_TYPE { + PARTITION_TYPE_DISABLED = 0x0, + PARTITION_TYPE_X = 0x1, + PARTITION_TYPE_Y = 0x2, + PARTITION_TYPE_Z = 0x3, + } PARTITION_TYPE; + typedef enum tagSIMD_SIZE { + SIMD_SIZE_SIMD8 = 0x0, + SIMD_SIZE_SIMD16 = 0x1, + SIMD_SIZE_SIMD32 = 0x2, + } SIMD_SIZE; + typedef enum tagPARTITION_ID { + PARTITION_ID_SUPPORTED_MIN = 0x0, + PARTITION_ID_SUPPORTED_MAX = 0xf, + } PARTITION_ID; + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.DwordLength = DWORD_LENGTH_FIXED_SIZE; + TheStructure.Common.CfeSubopcodeVariant = CFE_SUBOPCODE_VARIANT_STANDARD; + TheStructure.Common.CfeSubopcode = CFE_SUBOPCODE_COMPUTE_WALKER; + TheStructure.Common.ComputeCommandOpcode = COMPUTE_COMMAND_OPCODE_NEW_CFE_COMMAND; + TheStructure.Common.Pipeline = PIPELINE_COMPUTE; + TheStructure.Common.CommandType = COMMAND_TYPE_GFXPIPE; + TheStructure.Common.PartitionType = PARTITION_TYPE_DISABLED; + TheStructure.Common.SimdSize = SIMD_SIZE_SIMD8; + TheStructure.Common.InterfaceDescriptor.init(); + TheStructure.Common.PostSync.init(); + } + static tagCOMPUTE_WALKER sInit(void) { + COMPUTE_WALKER state; + state.init(); + return state; + } + inline uint32_t &getRawData(const uint32_t index) { + UNRECOVERABLE_IF(index >= 39); + return TheStructure.RawData[index]; + } + inline void setDwordLength(const DWORD_LENGTH value) { + TheStructure.Common.DwordLength = value; + } + inline DWORD_LENGTH getDwordLength(void) const { + return static_cast(TheStructure.Common.DwordLength); + } + inline void setPredicateEnable(const bool value) { + TheStructure.Common.PredicateEnable = value; + } + inline bool getPredicateEnable(void) const { + return TheStructure.Common.PredicateEnable; + } + inline void setWorkloadPartitionEnable(const bool value) { + TheStructure.Common.WorkloadPartitionEnable = value; + } + inline bool getWorkloadPartitionEnable(void) const { + return TheStructure.Common.WorkloadPartitionEnable; + } + inline void setIndirectParameterEnable(const bool value) { + TheStructure.Common.IndirectParameterEnable = value; + } + inline bool getIndirectParameterEnable(void) const { + return TheStructure.Common.IndirectParameterEnable; + } + inline void setUavWaitToProduce(const bool value) { + TheStructure.Common.UavWaitToProduce = value; + } + inline bool getUavWaitToProduce(void) const { + return TheStructure.Common.UavWaitToProduce; + } + inline void setUavProducer(const bool value) { + TheStructure.Common.UavProducer = value; + } + inline bool getUavProducer(void) const { + return TheStructure.Common.UavProducer; + } + inline void setUavConsumer(const bool value) { + TheStructure.Common.UavConsumer = value; + } + inline bool getUavConsumer(void) const { + return TheStructure.Common.UavConsumer; + } + inline void setSystolicModeEnable(const bool value) { + TheStructure.Common.SystolicModeEnable = value; + } + inline bool getSystolicModeEnable(void) const { + return TheStructure.Common.SystolicModeEnable; + } + inline void setCfeSubopcodeVariant(const CFE_SUBOPCODE_VARIANT value) { + TheStructure.Common.CfeSubopcodeVariant = value; + } + inline CFE_SUBOPCODE_VARIANT getCfeSubopcodeVariant(void) const { + return static_cast(TheStructure.Common.CfeSubopcodeVariant); + } + inline void setCfeSubopcode(const CFE_SUBOPCODE value) { + TheStructure.Common.CfeSubopcode = value; + } + inline CFE_SUBOPCODE getCfeSubopcode(void) const { + return static_cast(TheStructure.Common.CfeSubopcode); + } + inline void setComputeCommandOpcode(const COMPUTE_COMMAND_OPCODE value) { + TheStructure.Common.ComputeCommandOpcode = value; + } + inline COMPUTE_COMMAND_OPCODE getComputeCommandOpcode(void) const { + return static_cast(TheStructure.Common.ComputeCommandOpcode); + } + inline void setIndirectDataLength(const uint32_t value) { + UNRECOVERABLE_IF(value > 0x1ffff); + TheStructure.Common.IndirectDataLength = value; + } + inline uint32_t getIndirectDataLength(void) const { + return TheStructure.Common.IndirectDataLength; + } + inline void setPartitionType(const PARTITION_TYPE value) { + TheStructure.Common.PartitionType = value; + } + inline PARTITION_TYPE getPartitionType(void) const { + return static_cast(TheStructure.Common.PartitionType); + } + typedef enum tagINDIRECTDATASTARTADDRESS { + INDIRECTDATASTARTADDRESS_BIT_SHIFT = 0x6, + INDIRECTDATASTARTADDRESS_ALIGN_SIZE = 0x40, + } INDIRECTDATASTARTADDRESS; + inline void setIndirectDataStartAddress(const uint32_t value) { + UNRECOVERABLE_IF(value > 0xffffffc0); + TheStructure.Common.IndirectDataStartAddress = value >> INDIRECTDATASTARTADDRESS_BIT_SHIFT; + } + inline uint32_t getIndirectDataStartAddress(void) const { + return TheStructure.Common.IndirectDataStartAddress << INDIRECTDATASTARTADDRESS_BIT_SHIFT; + } + inline void setMessageSimd(const uint32_t value) { + TheStructure.Common.MessageSimd = value; + } + inline uint32_t getMessageSimd(void) const { + return (TheStructure.Common.MessageSimd); + } + inline void setTileLayout(const uint32_t value) { + TheStructure.Common.TileLayout = value; + } + inline uint32_t getTileLayout(void) const { + return (TheStructure.Common.TileLayout); + } + inline void setWalkOrder(const uint32_t value) { + TheStructure.Common.WalkOrder = value; + } + inline uint32_t getWalkOrder(void) const { + return (TheStructure.Common.WalkOrder); + } + inline void setEmitInlineParameter(const uint32_t value) { + TheStructure.Common.EmitInlineParameter = value; + } + inline uint32_t getEmitInlineParameter(void) const { + return (TheStructure.Common.EmitInlineParameter); + } + inline void setEmitLocalId(const uint32_t value) { + TheStructure.Common.EmitLocalId = value; + } + inline uint32_t getEmitLocalId(void) const { + return (TheStructure.Common.EmitLocalId); + } + inline void setGenerateLocalId(const uint32_t value) { + TheStructure.Common.GenerateLocalId = value; + } + inline uint32_t getGenerateLocalId(void) const { + return (TheStructure.Common.GenerateLocalId); + } + inline void setSimdSize(const SIMD_SIZE value) { + TheStructure.Common.SimdSize = value; + } + inline SIMD_SIZE getSimdSize(void) const { + return static_cast(TheStructure.Common.SimdSize); + } + inline void setExecutionMask(const uint32_t value) { + TheStructure.Common.ExecutionMask = value; + } + inline uint32_t getExecutionMask(void) const { + return TheStructure.Common.ExecutionMask; + } + inline void setLocalXMaximum(const uint32_t value) { + TheStructure.Common.LocalXMaximum = value; + } + inline uint32_t getLocalXMaximum(void) const { + return (TheStructure.Common.LocalXMaximum); + } + inline void setLocalYMaximum(const uint32_t value) { + TheStructure.Common.LocalYMaximum = value; + } + inline uint32_t getLocalYMaximum(void) const { + return (TheStructure.Common.LocalYMaximum); + } + inline void setLocalZMaximum(const uint32_t value) { + TheStructure.Common.LocalZMaximum = value; + } + inline uint32_t getLocalZMaximum(void) const { + return (TheStructure.Common.LocalZMaximum); + } + inline void setThreadGroupIdXDimension(const uint32_t value) { + TheStructure.Common.ThreadGroupIdXDimension = value; + } + inline uint32_t getThreadGroupIdXDimension(void) const { + return TheStructure.Common.ThreadGroupIdXDimension; + } + inline void setThreadGroupIdYDimension(const uint32_t value) { + TheStructure.Common.ThreadGroupIdYDimension = value; + } + inline uint32_t getThreadGroupIdYDimension(void) const { + return TheStructure.Common.ThreadGroupIdYDimension; + } + inline void setThreadGroupIdZDimension(const uint32_t value) { + TheStructure.Common.ThreadGroupIdZDimension = value; + } + inline uint32_t getThreadGroupIdZDimension(void) const { + return TheStructure.Common.ThreadGroupIdZDimension; + } + inline void setThreadGroupIdStartingX(const uint32_t value) { + TheStructure.Common.ThreadGroupIdStartingX = value; + } + inline uint32_t getThreadGroupIdStartingX(void) const { + return TheStructure.Common.ThreadGroupIdStartingX; + } + inline void setThreadGroupIdStartingY(const uint32_t value) { + TheStructure.Common.ThreadGroupIdStartingY = value; + } + inline uint32_t getThreadGroupIdStartingY(void) const { + return TheStructure.Common.ThreadGroupIdStartingY; + } + inline void setThreadGroupIdStartingZ(const uint32_t value) { + TheStructure.Common.ThreadGroupIdStartingZ = value; + } + inline uint32_t getThreadGroupIdStartingZ(void) const { + return TheStructure.Common.ThreadGroupIdStartingZ; + } + inline void setPartitionId(const uint64_t value) { + TheStructure.Common.PartitionId = value; + } + inline uint64_t getPartitionId(void) const { + return TheStructure.Common.PartitionId; + } + inline void setPartitionSize(const uint64_t value) { + TheStructure.Common.PartitionSize = value; + } + inline uint64_t getPartitionSize(void) const { + return TheStructure.Common.PartitionSize; + } + inline void setPreemptX(const uint32_t value) { + TheStructure.Common.PreemptX = value; + } + inline uint32_t getPreemptX(void) const { + return TheStructure.Common.PreemptX; + } + inline void setPreemptY(const uint32_t value) { + TheStructure.Common.PreemptY = value; + } + inline uint32_t getPreemptY(void) const { + return TheStructure.Common.PreemptY; + } + inline void setPreemptZ(const uint32_t value) { + TheStructure.Common.PreemptZ = value; + } + inline uint32_t getPreemptZ(void) const { + return TheStructure.Common.PreemptZ; + } + inline void setInterfaceDescriptor(const INTERFACE_DESCRIPTOR_DATA &value) { + TheStructure.Common.InterfaceDescriptor = value; + } + inline INTERFACE_DESCRIPTOR_DATA &getInterfaceDescriptor(void) { + return TheStructure.Common.InterfaceDescriptor; + } + inline void setPostSync(const POSTSYNC_DATA &value) { + TheStructure.Common.PostSync = value; + } + inline POSTSYNC_DATA &getPostSync(void) { + return TheStructure.Common.PostSync; + } + inline uint32_t *getInlineDataPointer() { + return reinterpret_cast(&TheStructure.Common.InlineData); + } +} COMPUTE_WALKER; +STATIC_ASSERT(156 == sizeof(COMPUTE_WALKER)); + +typedef struct tagCFE_STATE { + union tagTheStructure { + struct tagCommon { + // DWORD 0 + uint32_t DwordLength : BITFIELD_RANGE(0, 7); + uint32_t Reserved_8 : BITFIELD_RANGE(8, 15); + uint32_t CfeSubopcodeVariant : BITFIELD_RANGE(16, 17); + uint32_t CfeSubopcode : BITFIELD_RANGE(18, 23); + uint32_t ComputeCommandOpcode : BITFIELD_RANGE(24, 26); + uint32_t Pipeline : BITFIELD_RANGE(27, 28); + uint32_t CommandType : BITFIELD_RANGE(29, 31); + // DWORD 1 + uint64_t Reserved_32 : BITFIELD_RANGE(0, 7); + uint64_t Reserved_40 : BITFIELD_RANGE(8, 9); + uint64_t ScratchSpaceBuffer : BITFIELD_RANGE(10, 31); + // DWORD 2 + uint64_t Reserved_64 : BITFIELD_RANGE(32, 63); + // DWORD 3 + uint32_t Reserved_96 : BITFIELD_RANGE(0, 2); + uint32_t NumberOfWalkers : BITFIELD_RANGE(3, 5); + uint32_t FusedEuDispatch : BITFIELD_RANGE(6, 6); + uint32_t Reserved_103 : BITFIELD_RANGE(7, 9); + uint32_t LargeGRFThreadAdjustDisable : BITFIELD_RANGE(10, 10); + uint32_t ComputeOverdispatchDisable : BITFIELD_RANGE(11, 11); + uint32_t WeightedDispatchModeDisable : BITFIELD_RANGE(12, 12); + uint32_t SingleSliceDispatchCcsMode : BITFIELD_RANGE(13, 13); + uint32_t OverDispatchControl : BITFIELD_RANGE(14, 15); + uint32_t MaximumNumberOfThreads : BITFIELD_RANGE(16, 31); + // DWORD 4 + uint32_t Reserved_128; + // DWORD 5 + uint32_t Reserved_160 : BITFIELD_RANGE(0, 0); + uint32_t Reserved_161 : BITFIELD_RANGE(1, 10); + uint32_t Reserved_171 : BITFIELD_RANGE(11, 31); + } Common; + uint32_t RawData[6]; + } TheStructure; + typedef enum tagDWORD_LENGTH { + DWORD_LENGTH_DWORD_COUNT_N = 0x4, + } DWORD_LENGTH; + typedef enum tagCFE_SUBOPCODE_VARIANT { + CFE_SUBOPCODE_VARIANT_STANDARD = 0x0, + } CFE_SUBOPCODE_VARIANT; + typedef enum tagCFE_SUBOPCODE { + CFE_SUBOPCODE_CFE_STATE = 0x0, + } CFE_SUBOPCODE; + typedef enum tagCOMPUTE_COMMAND_OPCODE { + COMPUTE_COMMAND_OPCODE_NEW_CFE_COMMAND = 0x2, + } COMPUTE_COMMAND_OPCODE; + typedef enum tagPIPELINE { + PIPELINE_COMPUTE = 0x2, + } PIPELINE; + typedef enum tagCOMMAND_TYPE { + COMMAND_TYPE_GFXPIPE = 0x3, + } COMMAND_TYPE; + typedef enum tagOVER_DISPATCH_CONTROL { + OVER_DISPATCH_CONTROL_NONE = 0x0, + OVER_DISPATCH_CONTROL_LOW = 0x1, + OVER_DISPATCH_CONTROL_NORMAL = 0x2, + OVER_DISPATCH_CONTROL_HIGH = 0x3, + } OVER_DISPATCH_CONTROL; + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.DwordLength = DWORD_LENGTH_DWORD_COUNT_N; + TheStructure.Common.CfeSubopcodeVariant = CFE_SUBOPCODE_VARIANT_STANDARD; + TheStructure.Common.CfeSubopcode = CFE_SUBOPCODE_CFE_STATE; + TheStructure.Common.ComputeCommandOpcode = COMPUTE_COMMAND_OPCODE_NEW_CFE_COMMAND; + TheStructure.Common.Pipeline = PIPELINE_COMPUTE; + TheStructure.Common.CommandType = COMMAND_TYPE_GFXPIPE; + TheStructure.Common.OverDispatchControl = OVER_DISPATCH_CONTROL_NORMAL; + } + static tagCFE_STATE sInit(void) { + CFE_STATE state; + state.init(); + return state; + } + inline uint32_t &getRawData(const uint32_t index) { + UNRECOVERABLE_IF(index >= 6); + return TheStructure.RawData[index]; + } + inline void setCfeSubopcodeVariant(const CFE_SUBOPCODE_VARIANT value) { + TheStructure.Common.CfeSubopcodeVariant = value; + } + inline CFE_SUBOPCODE_VARIANT getCfeSubopcodeVariant(void) const { + return static_cast(TheStructure.Common.CfeSubopcodeVariant); + } + inline void setCfeSubopcode(const CFE_SUBOPCODE value) { + TheStructure.Common.CfeSubopcode = value; + } + inline CFE_SUBOPCODE getCfeSubopcode(void) const { + return static_cast(TheStructure.Common.CfeSubopcode); + } + inline void setComputeCommandOpcode(const COMPUTE_COMMAND_OPCODE value) { + TheStructure.Common.ComputeCommandOpcode = value; + } + inline COMPUTE_COMMAND_OPCODE getComputeCommandOpcode(void) const { + return static_cast(TheStructure.Common.ComputeCommandOpcode); + } + typedef enum tagSCRATCHSPACEBUFFER { + SCRATCHSPACEBUFFER_BIT_SHIFT = 0x6, + SCRATCHSPACEBUFFER_ALIGN_SIZE = 0x40, + } SCRATCHSPACEBUFFER; + inline void setScratchSpaceBuffer(const uint64_t value) { + UNRECOVERABLE_IF(value > 0xfffffc00L); + TheStructure.Common.ScratchSpaceBuffer = static_cast(value) >> SCRATCHSPACEBUFFER_BIT_SHIFT; + } + inline uint64_t getScratchSpaceBuffer(void) const { + return TheStructure.Common.ScratchSpaceBuffer << SCRATCHSPACEBUFFER_BIT_SHIFT; + } + inline void setNumberOfWalkers(const uint32_t value) { + UNRECOVERABLE_IF(value > 0x38); + TheStructure.Common.NumberOfWalkers = value - 1; + } + inline uint32_t getNumberOfWalkers(void) const { + return TheStructure.Common.NumberOfWalkers + 1; + } + inline void setFusedEuDispatch(const bool value) { + TheStructure.Common.FusedEuDispatch = value; + } + inline bool getFusedEuDispatch(void) const { + return TheStructure.Common.FusedEuDispatch; + } + inline void setLargeGRFThreadAdjustDisable(const bool value) { + TheStructure.Common.LargeGRFThreadAdjustDisable = value; + } + inline bool getLargeGRFThreadAdjustDisable() const { + return TheStructure.Common.LargeGRFThreadAdjustDisable; + } + inline void setComputeOverdispatchDisable(const bool value) { + TheStructure.Common.ComputeOverdispatchDisable = value; + } + inline bool getComputeOverdispatchDisable() const { + return TheStructure.Common.ComputeOverdispatchDisable; + } + inline void setWeightedDispatchModeDisable(const bool value) { + TheStructure.Common.WeightedDispatchModeDisable = value; + } + inline bool getWeightedDispatchModeDisable() const { + return TheStructure.Common.WeightedDispatchModeDisable; + } + inline void setSingleSliceDispatchCcsMode(const bool value) { + TheStructure.Common.SingleSliceDispatchCcsMode = value; + } + inline bool getSingleSliceDispatchCcsMode(void) const { + return TheStructure.Common.SingleSliceDispatchCcsMode; + } + inline void setOverDispatchControl(const OVER_DISPATCH_CONTROL value) { + TheStructure.Common.OverDispatchControl = value; + } + inline OVER_DISPATCH_CONTROL getOverDispatchControl(void) const { + return static_cast(TheStructure.Common.OverDispatchControl); + } + inline void setMaximumNumberOfThreads(const uint32_t value) { + UNRECOVERABLE_IF(value > 0xffff0000); + TheStructure.Common.MaximumNumberOfThreads = value - 1; + } + inline uint32_t getMaximumNumberOfThreads(void) const { + return TheStructure.Common.MaximumNumberOfThreads + 1; + } +} CFE_STATE; +STATIC_ASSERT(24 == sizeof(CFE_STATE)); + +typedef struct tagMI_ARB_CHECK { + union tagTheStructure { + struct tagCommon { + uint32_t Pre_FetchDisable : BITFIELD_RANGE(0, 0); + uint32_t Reserved_1 : BITFIELD_RANGE(1, 7); + uint32_t MaskBits : BITFIELD_RANGE(8, 15); + uint32_t Reserved_16 : BITFIELD_RANGE(16, 22); + uint32_t MiCommandOpcode : BITFIELD_RANGE(23, 28); + uint32_t CommandType : BITFIELD_RANGE(29, 31); + } Common; + uint32_t RawData[1]; + } TheStructure; + typedef enum tagMI_COMMAND_OPCODE { + MI_COMMAND_OPCODE_MI_ARB_CHECK = 0x5, + } MI_COMMAND_OPCODE; + typedef enum tagCOMMAND_TYPE { + COMMAND_TYPE_MI_COMMAND = 0x0, + } COMMAND_TYPE; + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.MiCommandOpcode = MI_COMMAND_OPCODE_MI_ARB_CHECK; + TheStructure.Common.CommandType = COMMAND_TYPE_MI_COMMAND; + } + static tagMI_ARB_CHECK sInit(void) { + MI_ARB_CHECK state; + state.init(); + return state; + } + inline uint32_t &getRawData(const uint32_t index) { + DEBUG_BREAK_IF(index >= 1); + return TheStructure.RawData[index]; + } + inline void setPreFetchDisable(const uint32_t value) { + TheStructure.Common.Pre_FetchDisable = value; + TheStructure.Common.MaskBits = 1 << 0; //PreFetchDisable is at bit0, so set bit0 of mask to 1 + } + inline uint32_t getPreFetchDisable(void) const { + return TheStructure.Common.Pre_FetchDisable; + } + inline void setMaskBits(const uint32_t value) { + DEBUG_BREAK_IF(value > 0xff00); + TheStructure.Common.MaskBits = value; + } + inline uint32_t getMaskBits(void) const { + return TheStructure.Common.MaskBits; + } +} MI_ARB_CHECK; +STATIC_ASSERT(4 == sizeof(MI_ARB_CHECK)); + +typedef struct tagMI_BATCH_BUFFER_START { + union tagTheStructure { + struct tagCommon { + uint32_t DwordLength : BITFIELD_RANGE(0, 7); + uint32_t AddressSpaceIndicator : BITFIELD_RANGE(8, 8); + uint32_t Reserved_9 : BITFIELD_RANGE(9, 9); + uint32_t ResourceStreamerEnable : BITFIELD_RANGE(10, 10); + uint32_t Reserved_11 : BITFIELD_RANGE(11, 14); + uint32_t PredicationEnable : BITFIELD_RANGE(15, 15); + uint32_t AddOffsetEnable : BITFIELD_RANGE(16, 16); + uint32_t Reserved_17 : BITFIELD_RANGE(17, 18); + uint32_t EnableCommandCache : BITFIELD_RANGE(19, 19); + uint32_t PoshEnable : BITFIELD_RANGE(20, 20); + uint32_t PoshStart : BITFIELD_RANGE(21, 21); + uint32_t SecondLevelBatchBuffer : BITFIELD_RANGE(22, 22); + uint32_t MiCommandOpcode : BITFIELD_RANGE(23, 28); + uint32_t CommandType : BITFIELD_RANGE(29, 31); + uint64_t Reserved_32 : BITFIELD_RANGE(0, 1); + uint64_t BatchBufferStartAddress : BITFIELD_RANGE(2, 47); + uint64_t BatchBufferStartAddress_Reserved_80 : BITFIELD_RANGE(48, 63); + } Common; + struct tagMi_Mode_Nestedbatchbufferenableis0 { + uint32_t Reserved_0 : BITFIELD_RANGE(0, 21); + uint32_t SecondLevelBatchBuffer : BITFIELD_RANGE(22, 22); + uint32_t Reserved_23 : BITFIELD_RANGE(23, 31); + uint64_t Reserved_32 : BITFIELD_RANGE(0, 47); + uint64_t Reserved_80 : BITFIELD_RANGE(48, 63); + } Mi_Mode_Nestedbatchbufferenableis0; + struct tagMi_Mode_Nestedbatchbufferenableis1 { + uint32_t Reserved_0 : BITFIELD_RANGE(0, 21); + uint32_t NestedLevelBatchBuffer : BITFIELD_RANGE(22, 22); + uint32_t Reserved_23 : BITFIELD_RANGE(23, 31); + uint64_t Reserved_32 : BITFIELD_RANGE(0, 47); + uint64_t Reserved_80 : BITFIELD_RANGE(48, 63); + } Mi_Mode_Nestedbatchbufferenableis1; + uint32_t RawData[3]; + } TheStructure; + typedef enum tagDWORD_LENGTH { + DWORD_LENGTH_EXCLUDES_DWORD_0_1 = 0x1, + } DWORD_LENGTH; + typedef enum tagADDRESS_SPACE_INDICATOR { + ADDRESS_SPACE_INDICATOR_GGTT = 0x0, + ADDRESS_SPACE_INDICATOR_PPGTT = 0x1, + } ADDRESS_SPACE_INDICATOR; + typedef enum tagNESTED_LEVEL_BATCH_BUFFER { + NESTED_LEVEL_BATCH_BUFFER_CHAIN = 0x0, + NESTED_LEVEL_BATCH_BUFFER_NESTED = 0x1, + } NESTED_LEVEL_BATCH_BUFFER; + typedef enum tagSECOND_LEVEL_BATCH_BUFFER { + SECOND_LEVEL_BATCH_BUFFER_FIRST_LEVEL_BATCH = 0x0, + SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH = 0x1, + } SECOND_LEVEL_BATCH_BUFFER; + typedef enum tagMI_COMMAND_OPCODE { + MI_COMMAND_OPCODE_MI_BATCH_BUFFER_START = 0x31, + } MI_COMMAND_OPCODE; + typedef enum tagCOMMAND_TYPE { + COMMAND_TYPE_MI_COMMAND = 0x0, + } COMMAND_TYPE; + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.DwordLength = DWORD_LENGTH_EXCLUDES_DWORD_0_1; + TheStructure.Common.AddressSpaceIndicator = ADDRESS_SPACE_INDICATOR_PPGTT; + TheStructure.Common.SecondLevelBatchBuffer = SECOND_LEVEL_BATCH_BUFFER_FIRST_LEVEL_BATCH; + TheStructure.Common.MiCommandOpcode = MI_COMMAND_OPCODE_MI_BATCH_BUFFER_START; + TheStructure.Common.CommandType = COMMAND_TYPE_MI_COMMAND; + TheStructure.Mi_Mode_Nestedbatchbufferenableis1.NestedLevelBatchBuffer = NESTED_LEVEL_BATCH_BUFFER_CHAIN; + } + static tagMI_BATCH_BUFFER_START sInit(void) { + MI_BATCH_BUFFER_START state; + state.init(); + return state; + } + inline uint32_t &getRawData(const uint32_t index) { + DEBUG_BREAK_IF(index >= 3); + return TheStructure.RawData[index]; + } + inline void setAddressSpaceIndicator(const ADDRESS_SPACE_INDICATOR value) { + TheStructure.Common.AddressSpaceIndicator = value; + } + inline ADDRESS_SPACE_INDICATOR getAddressSpaceIndicator(void) const { + return static_cast(TheStructure.Common.AddressSpaceIndicator); + } + inline void setResourceStreamerEnable(const bool value) { + TheStructure.Common.ResourceStreamerEnable = value; + } + inline bool getResourceStreamerEnable(void) const { + return TheStructure.Common.ResourceStreamerEnable; + } + inline void setPredicationEnable(const uint32_t value) { + TheStructure.Common.PredicationEnable = value; + } + inline uint32_t getPredicationEnable(void) const { + return TheStructure.Common.PredicationEnable; + } + inline void setAddOffsetEnable(const bool value) { + TheStructure.Common.AddOffsetEnable = value; + } + inline bool getAddOffsetEnable(void) const { + return TheStructure.Common.AddOffsetEnable; + } + inline void setEnableCommandCache(const uint32_t value) { + TheStructure.Common.EnableCommandCache = value; + } + inline uint32_t getEnableCommandCache(void) const { + return TheStructure.Common.EnableCommandCache; + } + inline void setPoshEnable(const uint32_t value) { + TheStructure.Common.PoshEnable = value; + } + inline uint32_t getPoshEnable(void) const { + return TheStructure.Common.PoshEnable; + } + inline void setPoshStart(const uint32_t value) { + TheStructure.Common.PoshStart = value; + } + inline uint32_t getPoshStart(void) const { + return TheStructure.Common.PoshStart; + } + inline void setSecondLevelBatchBuffer(const SECOND_LEVEL_BATCH_BUFFER value) { + TheStructure.Common.SecondLevelBatchBuffer = value; + } + inline SECOND_LEVEL_BATCH_BUFFER getSecondLevelBatchBuffer(void) const { + return static_cast(TheStructure.Common.SecondLevelBatchBuffer); + } + typedef enum tagBATCHBUFFERSTARTADDRESS { + BATCHBUFFERSTARTADDRESS_BIT_SHIFT = 0x2, + BATCHBUFFERSTARTADDRESS_ALIGN_SIZE = 0x4, + } BATCHBUFFERSTARTADDRESS; + inline void setBatchBufferStartAddress(const uint64_t value) { + TheStructure.Common.BatchBufferStartAddress = value >> BATCHBUFFERSTARTADDRESS_BIT_SHIFT; + } + inline void setBatchBufferStartAddressGraphicsaddress472(const uint64_t value) { + TheStructure.Common.BatchBufferStartAddress = value >> BATCHBUFFERSTARTADDRESS_BIT_SHIFT; + } + inline uint64_t getBatchBufferStartAddress(void) const { + return TheStructure.Common.BatchBufferStartAddress << BATCHBUFFERSTARTADDRESS_BIT_SHIFT; + } + inline uint64_t getBatchBufferStartAddressGraphicsaddress472(void) const { + return TheStructure.Common.BatchBufferStartAddress << BATCHBUFFERSTARTADDRESS_BIT_SHIFT; + } + inline void setNestedLevelBatchBuffer(const NESTED_LEVEL_BATCH_BUFFER value) { + TheStructure.Mi_Mode_Nestedbatchbufferenableis1.NestedLevelBatchBuffer = value; + } + inline NESTED_LEVEL_BATCH_BUFFER getNestedLevelBatchBuffer(void) const { + return static_cast(TheStructure.Mi_Mode_Nestedbatchbufferenableis1.NestedLevelBatchBuffer); + } +} MI_BATCH_BUFFER_START; +STATIC_ASSERT(12 == sizeof(MI_BATCH_BUFFER_START)); + +typedef struct tagMI_LOAD_REGISTER_MEM { + union tagTheStructure { + struct tagCommon { + uint32_t DwordLength : BITFIELD_RANGE(0, 7); + uint32_t MemoryObjectControlStateReserved_8 : BITFIELD_RANGE(8, 8); + uint32_t MemoryObjectControlStateIndexToMocsTables + : BITFIELD_RANGE(9, 14); + uint32_t MemoryObjectControlStateEnable : BITFIELD_RANGE(15, 15); + uint32_t VirtualEngineIdOffsetEnable : BITFIELD_RANGE(16, 16); + uint32_t MmioRemapEnable : BITFIELD_RANGE(17, 17); + uint32_t Reserved_18 : BITFIELD_RANGE(18, 18); + uint32_t AddCsMmioStartOffset : BITFIELD_RANGE(19, 19); + uint32_t Reserved_20 : BITFIELD_RANGE(20, 20); + uint32_t AsyncModeEnable : BITFIELD_RANGE(21, 21); + uint32_t UseGlobalGtt : BITFIELD_RANGE(22, 22); + uint32_t MiCommandOpcode : BITFIELD_RANGE(23, 28); + uint32_t CommandType : BITFIELD_RANGE(29, 31); + uint32_t Reserved_32 : BITFIELD_RANGE(0, 1); + uint32_t RegisterAddress : BITFIELD_RANGE(2, 22); + uint32_t Reserved_55 : BITFIELD_RANGE(23, 31); + uint64_t Reserved_64 : BITFIELD_RANGE(0, 1); + uint64_t MemoryAddress : BITFIELD_RANGE(2, 63); + } Common; + uint32_t RawData[4]; + } TheStructure; + typedef enum tagDWORD_LENGTH { + DWORD_LENGTH_EXCLUDES_DWORD_0_1 = 0x2, + } DWORD_LENGTH; + typedef enum tagMI_COMMAND_OPCODE { + MI_COMMAND_OPCODE_MI_LOAD_REGISTER_MEM = 0x29, + } MI_COMMAND_OPCODE; + typedef enum tagCOMMAND_TYPE { + COMMAND_TYPE_MI_COMMAND = 0x0, + } COMMAND_TYPE; + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.DwordLength = DWORD_LENGTH_EXCLUDES_DWORD_0_1; + TheStructure.Common.MiCommandOpcode = + MI_COMMAND_OPCODE_MI_LOAD_REGISTER_MEM; + TheStructure.Common.CommandType = COMMAND_TYPE_MI_COMMAND; + } + static tagMI_LOAD_REGISTER_MEM sInit(void) { + MI_LOAD_REGISTER_MEM state; + state.init(); + return state; + } + inline uint32_t &getRawData(const uint32_t index) { + DEBUG_BREAK_IF(index >= 4); + return TheStructure.RawData[index]; + } + inline void + setMemoryObjectControlStateIndexToMocsTables(const uint32_t value) { + DEBUG_BREAK_IF(value > 0x7e00); + TheStructure.Common.MemoryObjectControlStateIndexToMocsTables = value >> 1; + } + inline uint32_t getMemoryObjectControlStateIndexToMocsTables(void) const { + return (TheStructure.Common.MemoryObjectControlStateIndexToMocsTables << 1); + } + inline void setMemoryObjectControlStateEnable(const bool value) { + TheStructure.Common.MemoryObjectControlStateEnable = value; + } + inline bool getMemoryObjectControlStateEnable(void) const { + return TheStructure.Common.MemoryObjectControlStateEnable; + } + inline void setVirtualEngineIdOffsetEnable(const bool value) { + TheStructure.Common.VirtualEngineIdOffsetEnable = value; + } + inline bool getVirtualEngineIdOffsetEnable(void) const { + return TheStructure.Common.VirtualEngineIdOffsetEnable; + } + inline void setMmioRemapEnable(const bool value) { + TheStructure.Common.MmioRemapEnable = value; + } + inline bool getMmioRemapEnable(void) const { + return TheStructure.Common.MmioRemapEnable; + } + inline void setAddCsMmioStartOffset(const uint32_t value) { + TheStructure.Common.AddCsMmioStartOffset = value; + } + inline uint32_t getAddCsMmioStartOffset(void) const { + return TheStructure.Common.AddCsMmioStartOffset; + } + inline void setAsyncModeEnable(const bool value) { + TheStructure.Common.AsyncModeEnable = value; + } + inline bool getAsyncModeEnable(void) const { + return TheStructure.Common.AsyncModeEnable; + } + inline void setUseGlobalGtt(const bool value) { + TheStructure.Common.UseGlobalGtt = value; + } + inline bool getUseGlobalGtt(void) const { + return TheStructure.Common.UseGlobalGtt; + } + typedef enum tagREGISTERADDRESS { + REGISTERADDRESS_BIT_SHIFT = 0x2, + REGISTERADDRESS_ALIGN_SIZE = 0x4, + } REGISTERADDRESS; + inline void setRegisterAddress(const uint32_t value) { + DEBUG_BREAK_IF(value > 0x7ffffc); + TheStructure.Common.RegisterAddress = value >> REGISTERADDRESS_BIT_SHIFT; + } + inline uint32_t getRegisterAddress(void) const { + return TheStructure.Common.RegisterAddress << REGISTERADDRESS_BIT_SHIFT; + } + typedef enum tagMEMORYADDRESS { + MEMORYADDRESS_BIT_SHIFT = 0x2, + MEMORYADDRESS_ALIGN_SIZE = 0x4, + } MEMORYADDRESS; + inline void setMemoryAddress(const uint64_t value) { + DEBUG_BREAK_IF(value > 0xfffffffffffffffcL); + TheStructure.Common.MemoryAddress = value >> MEMORYADDRESS_BIT_SHIFT; + } + inline uint64_t getMemoryAddress(void) const { + return TheStructure.Common.MemoryAddress << MEMORYADDRESS_BIT_SHIFT; + } +} MI_LOAD_REGISTER_MEM; +STATIC_ASSERT(16 == sizeof(MI_LOAD_REGISTER_MEM)); + +typedef struct tagMI_LOAD_REGISTER_REG { + union tagTheStructure { + struct tagCommon { + uint32_t DwordLength : BITFIELD_RANGE(0, 7); + uint32_t Reserved_8 : BITFIELD_RANGE(8, 15); + uint32_t MmioRemapEnableSource : BITFIELD_RANGE(16, 16); + uint32_t MmioRemapEnableDestination : BITFIELD_RANGE(17, 17); + uint32_t AddCsMmioStartOffsetSource : BITFIELD_RANGE(18, 18); + uint32_t AddCsMmioStartOffsetDestination : BITFIELD_RANGE(19, 19); + uint32_t Reserved_20 : BITFIELD_RANGE(20, 22); + uint32_t MiCommandOpcode : BITFIELD_RANGE(23, 28); + uint32_t CommandType : BITFIELD_RANGE(29, 31); + uint32_t Reserved_32 : BITFIELD_RANGE(0, 1); + uint32_t SourceRegisterAddress : BITFIELD_RANGE(2, 22); + uint32_t Reserved_55 : BITFIELD_RANGE(23, 31); + uint32_t Reserved_64 : BITFIELD_RANGE(0, 1); + uint32_t DestinationRegisterAddress : BITFIELD_RANGE(2, 22); + uint32_t Reserved_87 : BITFIELD_RANGE(23, 31); + } Common; + uint32_t RawData[3]; + } TheStructure; + typedef enum tagDWORD_LENGTH { + DWORD_LENGTH_EXCLUDES_DWORD_0_1 = 0x1, + } DWORD_LENGTH; + typedef enum tagMI_COMMAND_OPCODE { + MI_COMMAND_OPCODE_MI_LOAD_REGISTER_REG = 0x2a, + } MI_COMMAND_OPCODE; + typedef enum tagCOMMAND_TYPE { + COMMAND_TYPE_MI_COMMAND = 0x0, + } COMMAND_TYPE; + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.DwordLength = DWORD_LENGTH_EXCLUDES_DWORD_0_1; + TheStructure.Common.MiCommandOpcode = MI_COMMAND_OPCODE_MI_LOAD_REGISTER_REG; + TheStructure.Common.CommandType = COMMAND_TYPE_MI_COMMAND; + } + static tagMI_LOAD_REGISTER_REG sInit(void) { + MI_LOAD_REGISTER_REG state; + state.init(); + return state; + } + inline uint32_t &getRawData(const uint32_t index) { + DEBUG_BREAK_IF(index >= 3); + return TheStructure.RawData[index]; + } + inline void setMmioRemapEnableSource(const bool value) { + TheStructure.Common.MmioRemapEnableSource = value; + } + inline bool getMmioRemapEnableSource(void) const { + return TheStructure.Common.MmioRemapEnableSource; + } + inline void setMmioRemapEnableDestination(const bool value) { + TheStructure.Common.MmioRemapEnableDestination = value; + } + inline bool getMmioRemapEnableDestination(void) const { + return TheStructure.Common.MmioRemapEnableDestination; + } + inline void setAddCsMmioStartOffsetSource(const uint32_t value) { + TheStructure.Common.AddCsMmioStartOffsetSource = value; + } + inline uint32_t getAddCsMmioStartOffsetSource(void) const { + return TheStructure.Common.AddCsMmioStartOffsetSource; + } + inline void setAddCsMmioStartOffsetDestination(const uint32_t value) { + TheStructure.Common.AddCsMmioStartOffsetDestination = value; + } + inline uint32_t getAddCsMmioStartOffsetDestination(void) const { + return TheStructure.Common.AddCsMmioStartOffsetDestination; + } + typedef enum tagSOURCEREGISTERADDRESS { + SOURCEREGISTERADDRESS_BIT_SHIFT = 0x2, + SOURCEREGISTERADDRESS_ALIGN_SIZE = 0x4, + } SOURCEREGISTERADDRESS; + inline void setSourceRegisterAddress(const uint32_t value) { + DEBUG_BREAK_IF(value > 0x7ffffc); + TheStructure.Common.SourceRegisterAddress = value >> SOURCEREGISTERADDRESS_BIT_SHIFT; + } + inline uint32_t getSourceRegisterAddress(void) const { + return TheStructure.Common.SourceRegisterAddress << SOURCEREGISTERADDRESS_BIT_SHIFT; + } + typedef enum tagDESTINATIONREGISTERADDRESS { + DESTINATIONREGISTERADDRESS_BIT_SHIFT = 0x2, + DESTINATIONREGISTERADDRESS_ALIGN_SIZE = 0x4, + } DESTINATIONREGISTERADDRESS; + inline void setDestinationRegisterAddress(const uint32_t value) { + DEBUG_BREAK_IF(value > 0x7ffffc); + TheStructure.Common.DestinationRegisterAddress = value >> DESTINATIONREGISTERADDRESS_BIT_SHIFT; + } + inline uint32_t getDestinationRegisterAddress(void) const { + return TheStructure.Common.DestinationRegisterAddress << DESTINATIONREGISTERADDRESS_BIT_SHIFT; + } +} MI_LOAD_REGISTER_REG; +STATIC_ASSERT(12 == sizeof(MI_LOAD_REGISTER_REG)); + +typedef struct tagMI_SEMAPHORE_WAIT { + union tagTheStructure { + struct tagCommon { + uint32_t DwordLength : BITFIELD_RANGE(0, 7); + uint32_t Reserved_8 : BITFIELD_RANGE(8, 11); + uint32_t CompareOperation : BITFIELD_RANGE(12, 14); + uint32_t WaitMode : BITFIELD_RANGE(15, 15); + uint32_t RegisterPollMode : BITFIELD_RANGE(16, 16); + uint32_t Reserved_17 : BITFIELD_RANGE(17, 17); + uint32_t WorkloadPartitionIdOffsetEnable : BITFIELD_RANGE(18, 18); + uint32_t Reserved_19 : BITFIELD_RANGE(19, 21); + uint32_t MemoryType : BITFIELD_RANGE(22, 22); + uint32_t MiCommandOpcode : BITFIELD_RANGE(23, 28); + uint32_t CommandType : BITFIELD_RANGE(29, 31); + uint32_t SemaphoreDataDword; + uint64_t Reserved_64 : BITFIELD_RANGE(0, 1); + uint64_t SemaphoreAddress : BITFIELD_RANGE(2, 63); + uint32_t Reserved_192 : BITFIELD_RANGE(0, 4); + uint32_t WaitTokenNumber : BITFIELD_RANGE(5, 9); + uint32_t Reserved_202 : BITFIELD_RANGE(10, 31); + } Common; + uint32_t RawData[5]; + } TheStructure; + typedef enum tagDWORD_LENGTH { + DWORD_LENGTH_EXCLUDES_DWORD_0_1 = 0x3, + } DWORD_LENGTH; + typedef enum tagCOMPARE_OPERATION { + COMPARE_OPERATION_SAD_GREATER_THAN_SDD = 0x0, + COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD = 0x1, + COMPARE_OPERATION_SAD_LESS_THAN_SDD = 0x2, + COMPARE_OPERATION_SAD_LESS_THAN_OR_EQUAL_SDD = 0x3, + COMPARE_OPERATION_SAD_EQUAL_SDD = 0x4, + COMPARE_OPERATION_SAD_NOT_EQUAL_SDD = 0x5, + } COMPARE_OPERATION; + typedef enum tagWAIT_MODE { + WAIT_MODE_SIGNAL_MODE = 0x0, + WAIT_MODE_POLLING_MODE = 0x1, + } WAIT_MODE; + typedef enum tagREGISTER_POLL_MODE { + REGISTER_POLL_MODE_MEMORY_POLL = 0x0, + REGISTER_POLL_MODE_REGISTER_POLL = 0x1, + } REGISTER_POLL_MODE; + typedef enum tagMEMORY_TYPE { + MEMORY_TYPE_PER_PROCESS_GRAPHICS_ADDRESS = 0x0, + MEMORY_TYPE_GLOBAL_GRAPHICS_ADDRESS = 0x1, + } MEMORY_TYPE; + typedef enum tagMI_COMMAND_OPCODE { + MI_COMMAND_OPCODE_MI_SEMAPHORE_WAIT = 0x1c, + } MI_COMMAND_OPCODE; + typedef enum tagCOMMAND_TYPE { + COMMAND_TYPE_MI_COMMAND = 0x0, + } COMMAND_TYPE; + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.DwordLength = DWORD_LENGTH_EXCLUDES_DWORD_0_1; + TheStructure.Common.CompareOperation = + COMPARE_OPERATION_SAD_GREATER_THAN_SDD; + TheStructure.Common.WaitMode = WAIT_MODE_SIGNAL_MODE; + TheStructure.Common.RegisterPollMode = REGISTER_POLL_MODE_MEMORY_POLL; + TheStructure.Common.MemoryType = MEMORY_TYPE_PER_PROCESS_GRAPHICS_ADDRESS; + TheStructure.Common.MiCommandOpcode = MI_COMMAND_OPCODE_MI_SEMAPHORE_WAIT; + TheStructure.Common.CommandType = COMMAND_TYPE_MI_COMMAND; + } + static tagMI_SEMAPHORE_WAIT sInit(void) { + MI_SEMAPHORE_WAIT state; + state.init(); + return state; + } + inline uint32_t &getRawData(const uint32_t index) { + DEBUG_BREAK_IF(index >= 6); + return TheStructure.RawData[index]; + } + inline void setCompareOperation(const COMPARE_OPERATION value) { + TheStructure.Common.CompareOperation = value; + } + inline COMPARE_OPERATION getCompareOperation(void) const { + return static_cast(TheStructure.Common.CompareOperation); + } + inline void setWaitMode(const WAIT_MODE value) { + TheStructure.Common.WaitMode = value; + } + inline WAIT_MODE getWaitMode(void) const { + return static_cast(TheStructure.Common.WaitMode); + } + inline void setRegisterPollMode(const REGISTER_POLL_MODE value) { + TheStructure.Common.RegisterPollMode = value; + } + inline REGISTER_POLL_MODE getRegisterPollMode(void) const { + return static_cast( + TheStructure.Common.RegisterPollMode); + } + inline void setWorkloadPartitionIdOffsetEnable(const bool value) { + TheStructure.Common.WorkloadPartitionIdOffsetEnable = value; + } + inline bool getWorkloadPartitionIdOffsetEnable(void) const { + return TheStructure.Common.WorkloadPartitionIdOffsetEnable; + } + inline void setMemoryType(const MEMORY_TYPE value) { + TheStructure.Common.MemoryType = value; + } + inline MEMORY_TYPE getMemoryType(void) const { + return static_cast(TheStructure.Common.MemoryType); + } + inline void setSemaphoreDataDword(const uint32_t value) { + TheStructure.Common.SemaphoreDataDword = value; + } + inline uint32_t getSemaphoreDataDword(void) const { + return TheStructure.Common.SemaphoreDataDword; + } + typedef enum tagSEMAPHOREADDRESS { + SEMAPHOREADDRESS_BIT_SHIFT = 0x2, + SEMAPHOREADDRESS_ALIGN_SIZE = 0x4, + } SEMAPHOREADDRESS; + inline void setSemaphoreGraphicsAddress(const uint64_t value) { + TheStructure.Common.SemaphoreAddress = value >> SEMAPHOREADDRESS_BIT_SHIFT; + } + inline uint64_t getSemaphoreGraphicsAddress(void) const { + return TheStructure.Common.SemaphoreAddress << SEMAPHOREADDRESS_BIT_SHIFT; + } + inline void setWaitTokenNumber(const uint32_t value) { + DEBUG_BREAK_IF(value > 0x3e0); + TheStructure.Common.WaitTokenNumber = value; + } + inline uint32_t getWaitTokenNumber(void) const { + return TheStructure.Common.WaitTokenNumber; + } +} MI_SEMAPHORE_WAIT; +STATIC_ASSERT(20 == sizeof(MI_SEMAPHORE_WAIT)); + +typedef struct tagMI_STORE_DATA_IMM { + union tagTheStructure { + struct tagCommon { + uint32_t DwordLength : BITFIELD_RANGE(0, 9); + uint32_t Reserved_10 : BITFIELD_RANGE(10, 10); + uint32_t WorkloadPartitionIdOffsetEnable : BITFIELD_RANGE(11, 11); + uint32_t Reserved_12 : BITFIELD_RANGE(12, 13); + uint32_t MemoryObjectControlStateIndexToMocsTables + : BITFIELD_RANGE(14, 19); + uint32_t MemoryObjectControlStateEnable : BITFIELD_RANGE(20, 20); + uint32_t StoreQword : BITFIELD_RANGE(21, 21); + uint32_t UseGlobalGtt : BITFIELD_RANGE(22, 22); + uint32_t MiCommandOpcode : BITFIELD_RANGE(23, 28); + uint32_t CommandType : BITFIELD_RANGE(29, 31); + uint64_t CoreModeEnable : BITFIELD_RANGE(0, 0); + uint64_t Reserved_33 : BITFIELD_RANGE(1, 1); + uint64_t Address : BITFIELD_RANGE(2, 47); + uint64_t AddressReserved_80 : BITFIELD_RANGE(48, 63); + uint32_t DataDword0; + uint32_t DataDword1; + } Common; + uint32_t RawData[5]; + } TheStructure; + typedef enum tagDWORD_LENGTH { + DWORD_LENGTH_STORE_DWORD = 0x2, + DWORD_LENGTH_STORE_QWORD = 0x3, + } DWORD_LENGTH; + typedef enum tagMI_COMMAND_OPCODE { + MI_COMMAND_OPCODE_MI_STORE_DATA_IMM = 0x20, + } MI_COMMAND_OPCODE; + typedef enum tagCOMMAND_TYPE { + COMMAND_TYPE_MI_COMMAND = 0x0, + } COMMAND_TYPE; + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.DwordLength = DWORD_LENGTH_STORE_DWORD; + TheStructure.Common.MiCommandOpcode = MI_COMMAND_OPCODE_MI_STORE_DATA_IMM; + TheStructure.Common.CommandType = COMMAND_TYPE_MI_COMMAND; + } + static tagMI_STORE_DATA_IMM sInit(void) { + MI_STORE_DATA_IMM state; + state.init(); + return state; + } + inline uint32_t &getRawData(const uint32_t index) { + DEBUG_BREAK_IF(index >= 5); + return TheStructure.RawData[index]; + } + inline void setDwordLength(const DWORD_LENGTH value) { + TheStructure.Common.DwordLength = value; + } + inline DWORD_LENGTH getDwordLength(void) const { + return static_cast(TheStructure.Common.DwordLength); + } + inline void setWorkloadPartitionIdOffsetEnable(const bool value) { + TheStructure.Common.WorkloadPartitionIdOffsetEnable = value; + } + inline bool getWorkloadPartitionIdOffsetEnable(void) const { + return TheStructure.Common.WorkloadPartitionIdOffsetEnable; + } + inline void + setMemoryObjectControlStateIndexToMocsTables(const uint32_t value) { + DEBUG_BREAK_IF(value > 0xfc000); + TheStructure.Common.MemoryObjectControlStateIndexToMocsTables = value >> 1; + } + inline uint32_t getMemoryObjectControlStateIndexToMocsTables(void) const { + return (TheStructure.Common.MemoryObjectControlStateIndexToMocsTables << 1); + } + inline void setMemoryObjectControlStateEnable(const bool value) { + TheStructure.Common.MemoryObjectControlStateEnable = value; + } + inline bool getMemoryObjectControlStateEnable(void) const { + return TheStructure.Common.MemoryObjectControlStateEnable; + } + inline void setStoreQword(const bool value) { + TheStructure.Common.StoreQword = value; + } + inline bool getStoreQword(void) const { + return TheStructure.Common.StoreQword; + } + inline void setUseGlobalGtt(const bool value) { + TheStructure.Common.UseGlobalGtt = value; + } + inline bool getUseGlobalGtt(void) const { + return TheStructure.Common.UseGlobalGtt; + } + inline void setCoreModeEnable(const uint64_t value) { + TheStructure.Common.CoreModeEnable = value; + } + inline uint64_t getCoreModeEnable(void) const { + return TheStructure.Common.CoreModeEnable; + } + typedef enum tagADDRESS { + ADDRESS_BIT_SHIFT = 0x2, + ADDRESS_ALIGN_SIZE = 0x4, + } ADDRESS; + inline void setAddress(const uint64_t value) { + DEBUG_BREAK_IF(value > 0xfffffffffffcL); + TheStructure.Common.Address = value >> ADDRESS_BIT_SHIFT; + } + inline uint64_t getAddress(void) const { + return TheStructure.Common.Address << ADDRESS_BIT_SHIFT; + } + inline void setDataDword0(const uint32_t value) { + TheStructure.Common.DataDword0 = value; + } + inline uint32_t getDataDword0(void) const { + return TheStructure.Common.DataDword0; + } + inline void setDataDword1(const uint32_t value) { + TheStructure.Common.DataDword1 = value; + } + inline uint32_t getDataDword1(void) const { + return TheStructure.Common.DataDword1; + } +} MI_STORE_DATA_IMM; +STATIC_ASSERT(20 == sizeof(MI_STORE_DATA_IMM)); + +typedef struct tagMI_STORE_REGISTER_MEM { + union tagTheStructure { + struct tagCommon { + uint32_t DwordLength : BITFIELD_RANGE(0, 7); + uint32_t MemoryObjectControlStateReserved_8 : BITFIELD_RANGE(8, 8); + uint32_t MemoryObjectControlStateIndexToMocsTables + : BITFIELD_RANGE(9, 14); + uint32_t MemoryObjectControlStateEnable : BITFIELD_RANGE(15, 15); + uint32_t WorkloadPartitionIdOffsetEnable : BITFIELD_RANGE(16, 16); + uint32_t MmioRemapEnable : BITFIELD_RANGE(17, 17); + uint32_t Reserved_18 : BITFIELD_RANGE(18, 18); + uint32_t AddCsMmioStartOffset : BITFIELD_RANGE(19, 19); + uint32_t Reserved_20 : BITFIELD_RANGE(20, 20); + uint32_t PredicateEnable : BITFIELD_RANGE(21, 21); + uint32_t UseGlobalGtt : BITFIELD_RANGE(22, 22); + uint32_t MiCommandOpcode : BITFIELD_RANGE(23, 28); + uint32_t CommandType : BITFIELD_RANGE(29, 31); + uint32_t Reserved_32 : BITFIELD_RANGE(0, 1); + uint32_t RegisterAddress : BITFIELD_RANGE(2, 22); + uint32_t Reserved_55 : BITFIELD_RANGE(23, 31); + uint64_t Reserved_64 : BITFIELD_RANGE(0, 1); + uint64_t MemoryAddress : BITFIELD_RANGE(2, 63); + } Common; + uint32_t RawData[4]; + } TheStructure; + typedef enum tagDWORD_LENGTH { + DWORD_LENGTH_EXCLUDES_DWORD_0_1 = 0x2, + } DWORD_LENGTH; + typedef enum tagMI_COMMAND_OPCODE { + MI_COMMAND_OPCODE_MI_STORE_REGISTER_MEM = 0x24, + } MI_COMMAND_OPCODE; + typedef enum tagCOMMAND_TYPE { + COMMAND_TYPE_MI_COMMAND = 0x0, + } COMMAND_TYPE; + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.DwordLength = DWORD_LENGTH_EXCLUDES_DWORD_0_1; + TheStructure.Common.MiCommandOpcode = + MI_COMMAND_OPCODE_MI_STORE_REGISTER_MEM; + TheStructure.Common.CommandType = COMMAND_TYPE_MI_COMMAND; + } + static tagMI_STORE_REGISTER_MEM sInit(void) { + MI_STORE_REGISTER_MEM state; + state.init(); + return state; + } + inline uint32_t &getRawData(const uint32_t index) { + DEBUG_BREAK_IF(index >= 4); + return TheStructure.RawData[index]; + } + inline void + setMemoryObjectControlStateIndexToMocsTables(const uint32_t value) { + DEBUG_BREAK_IF(value > 0x7e00); + TheStructure.Common.MemoryObjectControlStateIndexToMocsTables = value >> 1; + } + inline uint32_t getMemoryObjectControlStateIndexToMocsTables(void) const { + return (TheStructure.Common.MemoryObjectControlStateIndexToMocsTables << 1); + } + inline void setMemoryObjectControlStateEnable(const bool value) { + TheStructure.Common.MemoryObjectControlStateEnable = value; + } + inline bool getMemoryObjectControlStateEnable(void) const { + return TheStructure.Common.MemoryObjectControlStateEnable; + } + inline void setWorkloadPartitionIdOffsetEnable(const bool value) { + TheStructure.Common.WorkloadPartitionIdOffsetEnable = value; + } + inline bool getWorkloadPartitionIdOffsetEnable(void) const { + return TheStructure.Common.WorkloadPartitionIdOffsetEnable; + } + inline void setMmioRemapEnable(const bool value) { + TheStructure.Common.MmioRemapEnable = value; + } + inline bool getMmioRemapEnable(void) const { + return TheStructure.Common.MmioRemapEnable; + } + inline void setAddCsMmioStartOffset(const uint32_t value) { + TheStructure.Common.AddCsMmioStartOffset = value; + } + inline uint32_t getAddCsMmioStartOffset(void) const { + return TheStructure.Common.AddCsMmioStartOffset; + } + inline void setPredicateEnable(const bool value) { + TheStructure.Common.PredicateEnable = value; + } + inline bool getPredicateEnable(void) const { + return TheStructure.Common.PredicateEnable; + } + inline void setUseGlobalGtt(const bool value) { + TheStructure.Common.UseGlobalGtt = value; + } + inline bool getUseGlobalGtt(void) const { + return TheStructure.Common.UseGlobalGtt; + } + typedef enum tagREGISTERADDRESS { + REGISTERADDRESS_BIT_SHIFT = 0x2, + REGISTERADDRESS_ALIGN_SIZE = 0x4, + } REGISTERADDRESS; + inline void setRegisterAddress(const uint32_t value) { + DEBUG_BREAK_IF(value > 0x7ffffc); + TheStructure.Common.RegisterAddress = value >> REGISTERADDRESS_BIT_SHIFT; + } + inline uint32_t getRegisterAddress(void) const { + return TheStructure.Common.RegisterAddress << REGISTERADDRESS_BIT_SHIFT; + } + typedef enum tagMEMORYADDRESS { + MEMORYADDRESS_BIT_SHIFT = 0x2, + MEMORYADDRESS_ALIGN_SIZE = 0x4, + } MEMORYADDRESS; + inline void setMemoryAddress(const uint64_t value) { + DEBUG_BREAK_IF(value > 0xfffffffffffffffcL); + TheStructure.Common.MemoryAddress = value >> MEMORYADDRESS_BIT_SHIFT; + } + inline uint64_t getMemoryAddress(void) const { + return TheStructure.Common.MemoryAddress << MEMORYADDRESS_BIT_SHIFT; + } +} MI_STORE_REGISTER_MEM; +STATIC_ASSERT(16 == sizeof(MI_STORE_REGISTER_MEM)); + +typedef struct tagPIPELINE_SELECT { + union tagTheStructure { + struct tagCommon { + uint32_t PipelineSelection : BITFIELD_RANGE(0, 1); + uint32_t RenderSliceCommonPowerGateEnable : BITFIELD_RANGE(2, 2); + uint32_t RenderSamplerPowerGateEnable : BITFIELD_RANGE(3, 3); + uint32_t MediaSamplerDopClockGateEnable : BITFIELD_RANGE(4, 4); + uint32_t Reserved_5 : BITFIELD_RANGE(5, 5); + uint32_t MediaSamplerPowerClockGateDisable : BITFIELD_RANGE(6, 6); + uint32_t SystolicModeEnable : BITFIELD_RANGE(7, 7); + uint32_t MaskBits : BITFIELD_RANGE(8, 15); + uint32_t _3DCommandSubOpcode : BITFIELD_RANGE(16, 23); + uint32_t _3DCommandOpcode : BITFIELD_RANGE(24, 26); + uint32_t CommandSubtype : BITFIELD_RANGE(27, 28); + uint32_t CommandType : BITFIELD_RANGE(29, 31); + } Common; + uint32_t RawData[1]; + } TheStructure; + typedef enum tagPIPELINE_SELECTION { + PIPELINE_SELECTION_3D = 0x0, + PIPELINE_SELECTION_MEDIA = 0x1, + PIPELINE_SELECTION_GPGPU = 0x2, + } PIPELINE_SELECTION; + typedef enum tag_3D_COMMAND_SUB_OPCODE { + _3D_COMMAND_SUB_OPCODE_PIPELINE_SELECT = 0x4, + } _3D_COMMAND_SUB_OPCODE; + typedef enum tag_3D_COMMAND_OPCODE { + _3D_COMMAND_OPCODE_GFXPIPE_NONPIPELINED = 0x1, + } _3D_COMMAND_OPCODE; + typedef enum tagCOMMAND_SUBTYPE { + COMMAND_SUBTYPE_GFXPIPE_SINGLE_DW = 0x1, + } COMMAND_SUBTYPE; + typedef enum tagCOMMAND_TYPE { + COMMAND_TYPE_GFXPIPE = 0x3, + } COMMAND_TYPE; + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.PipelineSelection = PIPELINE_SELECTION_3D; + TheStructure.Common._3DCommandSubOpcode = + _3D_COMMAND_SUB_OPCODE_PIPELINE_SELECT; + TheStructure.Common._3DCommandOpcode = + _3D_COMMAND_OPCODE_GFXPIPE_NONPIPELINED; + TheStructure.Common.CommandSubtype = COMMAND_SUBTYPE_GFXPIPE_SINGLE_DW; + TheStructure.Common.CommandType = COMMAND_TYPE_GFXPIPE; + } + static tagPIPELINE_SELECT sInit(void) { + PIPELINE_SELECT state; + state.init(); + return state; + } + inline uint32_t &getRawData(const uint32_t index) { + DEBUG_BREAK_IF(index >= 1); + return TheStructure.RawData[index]; + } + inline void setPipelineSelection(const PIPELINE_SELECTION value) { + TheStructure.Common.PipelineSelection = value; + } + inline PIPELINE_SELECTION getPipelineSelection(void) const { + return static_cast( + TheStructure.Common.PipelineSelection); + } + inline void setRenderSliceCommonPowerGateEnable(const bool value) { + TheStructure.Common.RenderSliceCommonPowerGateEnable = value; + } + inline bool getRenderSliceCommonPowerGateEnable(void) const { + return TheStructure.Common.RenderSliceCommonPowerGateEnable; + } + inline void setRenderSamplerPowerGateEnable(const bool value) { + TheStructure.Common.RenderSamplerPowerGateEnable = value; + } + inline bool getRenderSamplerPowerGateEnable(void) const { + return TheStructure.Common.RenderSamplerPowerGateEnable; + } + inline void setMediaSamplerDopClockGateEnable(const bool value) { + TheStructure.Common.MediaSamplerDopClockGateEnable = value; + } + inline bool getMediaSamplerDopClockGateEnable(void) const { + return TheStructure.Common.MediaSamplerDopClockGateEnable; + } + inline void setMediaSamplerPowerClockGateDisable(const bool value) { + TheStructure.Common.MediaSamplerPowerClockGateDisable = value; + } + inline bool getMediaSamplerPowerClockGateDisable(void) const { + return TheStructure.Common.MediaSamplerPowerClockGateDisable; + } + inline void setSystolicModeEnable(const bool value) { + TheStructure.Common.SystolicModeEnable = value; + } + inline bool getSystolicModeEnable(void) const { + return TheStructure.Common.SystolicModeEnable; + } + inline void setMaskBits(const uint32_t value) { + DEBUG_BREAK_IF(value > 0xff00); + TheStructure.Common.MaskBits = value; + } + inline uint32_t getMaskBits(void) const { + return TheStructure.Common.MaskBits; + } +} PIPELINE_SELECT; +STATIC_ASSERT(4 == sizeof(PIPELINE_SELECT)); + +typedef struct tagSTATE_COMPUTE_MODE { + union tagTheStructure { + struct tagCommon { + uint32_t DwordLength : BITFIELD_RANGE(0, 7); + uint32_t Reserved_8 : BITFIELD_RANGE(8, 15); + uint32_t _3DCommandSubOpcode : BITFIELD_RANGE(16, 23); + uint32_t _3DCommandOpcode : BITFIELD_RANGE(24, 26); + uint32_t CommandSubtype : BITFIELD_RANGE(27, 28); + uint32_t CommandType : BITFIELD_RANGE(29, 31); + uint32_t DisableSupportForMultiGpuFence : BITFIELD_RANGE(0, 0); + uint32_t ForceDisableSupportForMultiGpuAtomics : BITFIELD_RANGE(1, 1); + uint32_t ForceDisableSupportForMultiGpuPartialWrites + : BITFIELD_RANGE(2, 2); + uint32_t ForceNonCoherent : BITFIELD_RANGE(3, 4); + uint32_t Reserved_37 : BITFIELD_RANGE(5, 9); + uint32_t BindingTableAlignment : BITFIELD_RANGE(10, 10); + uint32_t DisableAtomicOnClearData : BITFIELD_RANGE(11, 11); + uint32_t CoherentAccessL1CacheDisable : BITFIELD_RANGE(12, 12); + uint32_t DisableL1InvalidateForNonL1CacheableWrites + : BITFIELD_RANGE(13, 13); + uint32_t Reserved_46 : BITFIELD_RANGE(14, 14); + uint32_t LargeGrfMode : BITFIELD_RANGE(15, 15); + uint32_t MaskBits : BITFIELD_RANGE(16, 31); + } Common; + uint32_t RawData[2]; + } TheStructure; + typedef enum tagDWORD_LENGTH { + DWORD_LENGTH_EXCLUDES_DWORD_0_1 = 0x0, + } DWORD_LENGTH; + typedef enum tag_3D_COMMAND_SUB_OPCODE { + _3D_COMMAND_SUB_OPCODE_STATE_COMPUTE_MODE = 0x5, + } _3D_COMMAND_SUB_OPCODE; + typedef enum tag_3D_COMMAND_OPCODE { + _3D_COMMAND_OPCODE_GFXPIPE_NONPIPELINED = 0x1, + } _3D_COMMAND_OPCODE; + typedef enum tagCOMMAND_SUBTYPE { + COMMAND_SUBTYPE_GFXPIPE_COMMON = 0x0, + } COMMAND_SUBTYPE; + typedef enum tagCOMMAND_TYPE { + COMMAND_TYPE_GFXPIPE = 0x3, + } COMMAND_TYPE; + typedef enum tagFORCE_NON_COHERENT { + FORCE_NON_COHERENT_FORCE_DISABLED = 0x0, + FORCE_NON_COHERENT_FORCE_CPU_NON_COHERENT = 0x1, + FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT = 0x2, + } FORCE_NON_COHERENT; + typedef enum tagBINDING_TABLE_ALIGNMENT { + BINDING_TABLE_ALIGNMENT_LEGACY = 0x0, + } BINDING_TABLE_ALIGNMENT; + typedef enum tagDISABLE_ATOMIC_ON_CLEAR_DATA { + DISABLE_ATOMIC_ON_CLEAR_DATA_ENABLE = 0x0, + DISABLE_ATOMIC_ON_CLEAR_DATA_DISABLE = 0x1, + } DISABLE_ATOMIC_ON_CLEAR_DATA; + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.DwordLength = DWORD_LENGTH_EXCLUDES_DWORD_0_1; + TheStructure.Common._3DCommandSubOpcode = + _3D_COMMAND_SUB_OPCODE_STATE_COMPUTE_MODE; + TheStructure.Common._3DCommandOpcode = + _3D_COMMAND_OPCODE_GFXPIPE_NONPIPELINED; + TheStructure.Common.CommandSubtype = COMMAND_SUBTYPE_GFXPIPE_COMMON; + TheStructure.Common.CommandType = COMMAND_TYPE_GFXPIPE; + TheStructure.Common.ForceNonCoherent = FORCE_NON_COHERENT_FORCE_DISABLED; + TheStructure.Common.BindingTableAlignment = BINDING_TABLE_ALIGNMENT_LEGACY; + TheStructure.Common.DisableAtomicOnClearData = + DISABLE_ATOMIC_ON_CLEAR_DATA_ENABLE; + TheStructure.Common.ForceDisableSupportForMultiGpuAtomics = 1; + TheStructure.Common.ForceDisableSupportForMultiGpuPartialWrites = 1; + } + static tagSTATE_COMPUTE_MODE sInit(void) { + STATE_COMPUTE_MODE state; + state.init(); + return state; + } + inline uint32_t &getRawData(const uint32_t index) { + DEBUG_BREAK_IF(index >= 2); + return TheStructure.RawData[index]; + } + inline void setDisableSupportForMultiGpuFence(const bool value) { + TheStructure.Common.DisableSupportForMultiGpuFence = value; + } + inline bool getDisableSupportForMultiGpuFence(void) const { + return TheStructure.Common.DisableSupportForMultiGpuFence; + } + inline void setForceDisableSupportForMultiGpuAtomics(const bool value) { + TheStructure.Common.ForceDisableSupportForMultiGpuAtomics = value; + } + inline bool getForceDisableSupportForMultiGpuAtomics(void) const { + return TheStructure.Common.ForceDisableSupportForMultiGpuAtomics; + } + inline void setForceDisableSupportForMultiGpuPartialWrites(const bool value) { + TheStructure.Common.ForceDisableSupportForMultiGpuPartialWrites = value; + } + inline bool getForceDisableSupportForMultiGpuPartialWrites(void) const { + return TheStructure.Common.ForceDisableSupportForMultiGpuPartialWrites; + } + inline void setForceNonCoherent(const FORCE_NON_COHERENT value) { + TheStructure.Common.ForceNonCoherent = value; + } + inline FORCE_NON_COHERENT getForceNonCoherent(void) const { + return static_cast( + TheStructure.Common.ForceNonCoherent); + } + inline void setBindingTableAlignment(const BINDING_TABLE_ALIGNMENT value) { + TheStructure.Common.BindingTableAlignment = value; + } + inline BINDING_TABLE_ALIGNMENT getBindingTableAlignment(void) const { + return static_cast( + TheStructure.Common.BindingTableAlignment); + } + inline void + setDisableAtomicOnClearData(const DISABLE_ATOMIC_ON_CLEAR_DATA value) { + TheStructure.Common.DisableAtomicOnClearData = value; + } + inline DISABLE_ATOMIC_ON_CLEAR_DATA getDisableAtomicOnClearData(void) const { + return static_cast( + TheStructure.Common.DisableAtomicOnClearData); + } + inline void setCoherentAccessL1CacheDisable(const uint32_t value) { + DEBUG_BREAK_IF(value > 0x1000); + TheStructure.Common.CoherentAccessL1CacheDisable = value; + } + inline uint32_t getCoherentAccessL1CacheDisable(void) const { + return TheStructure.Common.CoherentAccessL1CacheDisable; + } + inline void + setDisableL1InvalidateForNonL1CacheableWrites(const uint32_t value) { + DEBUG_BREAK_IF(value > 0x2000); + TheStructure.Common.DisableL1InvalidateForNonL1CacheableWrites = value; + } + inline uint32_t getDisableL1InvalidateForNonL1CacheableWrites(void) const { + return TheStructure.Common.DisableL1InvalidateForNonL1CacheableWrites; + } + inline void setLargeGrfMode(const bool value) { + TheStructure.Common.LargeGrfMode = value; + } + inline bool getLargeGrfMode(void) const { + return TheStructure.Common.LargeGrfMode; + } + inline void setMaskBits(const uint32_t value) { + DEBUG_BREAK_IF(value > 0xffff0000L); + TheStructure.Common.MaskBits = value; + } + inline uint32_t getMaskBits(void) const { + return TheStructure.Common.MaskBits; + } +} STATE_COMPUTE_MODE; +STATIC_ASSERT(8 == sizeof(STATE_COMPUTE_MODE)); + +typedef struct tag_3DSTATE_BINDING_TABLE_POOL_ALLOC { + union tagTheStructure { + struct tagCommon { + uint32_t DwordLength : BITFIELD_RANGE(0, 7); + uint32_t Reserved_8 : BITFIELD_RANGE(8, 15); + uint32_t _3DCommandSubOpcode : BITFIELD_RANGE(16, 23); + uint32_t _3DCommandOpcode : BITFIELD_RANGE(24, 26); + uint32_t CommandSubtype : BITFIELD_RANGE(27, 28); + uint32_t CommandType : BITFIELD_RANGE(29, 31); + uint64_t SurfaceObjectControlStateEncryptedData : BITFIELD_RANGE(0, 0); + uint64_t SurfaceObjectControlStateIndexToMocsTables : BITFIELD_RANGE(1, 6); + uint64_t Reserved_39 : BITFIELD_RANGE(7, 9); + uint64_t Reserved_42 : BITFIELD_RANGE(10, 10); + uint64_t Reserved_43 : BITFIELD_RANGE(11, 11); + uint64_t BindingTablePoolBaseAddress : BITFIELD_RANGE(12, 47); + uint64_t BindingTablePoolBaseAddressReserved_80 : BITFIELD_RANGE(48, 63); + uint32_t Reserved_96 : BITFIELD_RANGE(0, 11); + uint32_t BindingTablePoolBufferSize : BITFIELD_RANGE(12, 31); + } Common; + uint32_t RawData[4]; + } TheStructure; + typedef enum tagDWORD_LENGTH { + DWORD_LENGTH_DWORD_COUNT_N = 0x2, + } DWORD_LENGTH; + typedef enum tag_3D_COMMAND_SUB_OPCODE { + _3D_COMMAND_SUB_OPCODE_3DSTATE_BINDING_TABLE_POOL_ALLOC = 0x19, + } _3D_COMMAND_SUB_OPCODE; + typedef enum tag_3D_COMMAND_OPCODE { + _3D_COMMAND_OPCODE_3DSTATE_NONPIPELINED = 0x1, + } _3D_COMMAND_OPCODE; + typedef enum tagCOMMAND_SUBTYPE { + COMMAND_SUBTYPE_GFXPIPE_3D = 0x3, + } COMMAND_SUBTYPE; + typedef enum tagCOMMAND_TYPE { + COMMAND_TYPE_GFXPIPE = 0x3, + } COMMAND_TYPE; + + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.DwordLength = DWORD_LENGTH_DWORD_COUNT_N; + TheStructure.Common._3DCommandSubOpcode = _3D_COMMAND_SUB_OPCODE_3DSTATE_BINDING_TABLE_POOL_ALLOC; + TheStructure.Common._3DCommandOpcode = _3D_COMMAND_OPCODE_3DSTATE_NONPIPELINED; + TheStructure.Common.CommandSubtype = COMMAND_SUBTYPE_GFXPIPE_3D; + TheStructure.Common.CommandType = COMMAND_TYPE_GFXPIPE; + TheStructure.Common.BindingTablePoolBufferSize = 0; + } + static tag_3DSTATE_BINDING_TABLE_POOL_ALLOC sInit(void) { + _3DSTATE_BINDING_TABLE_POOL_ALLOC state; + state.init(); + return state; + } + inline uint32_t &getRawData(const uint32_t index) { + UNRECOVERABLE_IF(index >= 4); + return TheStructure.RawData[index]; + } + inline void setSurfaceObjectControlStateEncryptedData(const bool value) { + TheStructure.Common.SurfaceObjectControlStateEncryptedData = value; + } + inline bool getSurfaceObjectControlStateEncryptedData(void) const { + return TheStructure.Common.SurfaceObjectControlStateEncryptedData; + } + inline void setSurfaceObjectControlStateIndexToMocsTables(const uint64_t value) { + UNRECOVERABLE_IF(value > 0x7eL); + TheStructure.Common.SurfaceObjectControlStateIndexToMocsTables = value >> 1; + } + inline uint64_t getSurfaceObjectControlStateIndexToMocsTables(void) const { + return (TheStructure.Common.SurfaceObjectControlStateIndexToMocsTables << 1); + } + typedef enum tagBINDINGTABLEPOOLBASEADDRESS { + BINDINGTABLEPOOLBASEADDRESS_BIT_SHIFT = 0xc, + BINDINGTABLEPOOLBASEADDRESS_ALIGN_SIZE = 0x1000, + } BINDINGTABLEPOOLBASEADDRESS; + inline void setBindingTablePoolBaseAddress(const uint64_t value) { + TheStructure.Common.BindingTablePoolBaseAddress = value >> BINDINGTABLEPOOLBASEADDRESS_BIT_SHIFT; + } + inline uint64_t getBindingTablePoolBaseAddress(void) const { + return TheStructure.Common.BindingTablePoolBaseAddress << BINDINGTABLEPOOLBASEADDRESS_BIT_SHIFT; + } + inline void setBindingTablePoolBufferSize(const uint32_t value) { TheStructure.Common.BindingTablePoolBufferSize = value; } + inline uint32_t getBindingTablePoolBufferSize(void) const { return TheStructure.Common.BindingTablePoolBufferSize; } +} _3DSTATE_BINDING_TABLE_POOL_ALLOC; +STATIC_ASSERT(16 == sizeof(_3DSTATE_BINDING_TABLE_POOL_ALLOC)); + +typedef struct tagL3_FLUSH_ADDRESS_RANGE { + union tagTheStructure { + struct tagCommon { + uint64_t Reserved_0 : BITFIELD_RANGE(0, 2); + uint64_t AddressMask : BITFIELD_RANGE(3, 8); + uint64_t Reserved_9 : BITFIELD_RANGE(9, 11); + uint64_t AddressLow : BITFIELD_RANGE(12, 31); + uint64_t AddressHigh : BITFIELD_RANGE(32, 47); + uint64_t Reserved_48 : BITFIELD_RANGE(48, 59); + uint64_t L3FlushEvictionPolicy : BITFIELD_RANGE(60, 61); + uint64_t Reserved_62 : BITFIELD_RANGE(62, 63); + } Common; + uint32_t RawData[2]; + } TheStructure; + typedef enum tagL3_FLUSH_EVICTION_POLICY { + L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION = 0x0, + L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_OUT_EVICTION = 0x1, + L3_FLUSH_EVICTION_POLICY_DISCARD = 0x2, + } L3_FLUSH_EVICTION_POLICY; + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.L3FlushEvictionPolicy = L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION; + } + static tagL3_FLUSH_ADDRESS_RANGE sInit(void) { + L3_FLUSH_ADDRESS_RANGE state; + state.init(); + return state; + } + inline uint32_t &getRawData(const uint32_t index) { + UNRECOVERABLE_IF(index >= 2); + return TheStructure.RawData[index]; + } + + typedef enum tagADDRESSLOW { + ADDRESSLOW_BIT_SHIFT = 0xC, + ADDRESSLOW_ALIGN_SIZE = 0x1000, + } ADDRESSLOW; + + inline void setAddressLow(const uint64_t value) { + TheStructure.Common.AddressLow = value >> ADDRESSLOW_BIT_SHIFT; + } + + inline uint64_t getAddressLow(void) const { + return (TheStructure.Common.AddressLow << ADDRESSLOW_BIT_SHIFT); + } + + inline void setAddressHigh(const uint64_t value) { + TheStructure.Common.AddressHigh = value; + } + + inline uint64_t getAddressHigh() const { + return (TheStructure.Common.AddressHigh); + } + + inline void setAddress(const uint64_t value) { + setAddressLow(static_cast(value)); + setAddressHigh(static_cast(value >> 32)); + } + + inline uint64_t getAddress() const { + return static_cast(getAddressLow()) | (static_cast(getAddressHigh()) << 32); + } + + inline void setL3FlushEvictionPolicy(const L3_FLUSH_EVICTION_POLICY value) { + TheStructure.Common.L3FlushEvictionPolicy = value; + } + inline L3_FLUSH_EVICTION_POLICY getL3FlushEvictionPolicy(void) const { + return static_cast(TheStructure.Common.L3FlushEvictionPolicy); + } + inline void setAddressMask(const uint64_t value) { + UNRECOVERABLE_IF(value > 0x1f8); + TheStructure.Common.AddressMask = value; + } + inline uint32_t getAddressMask(void) const { + return TheStructure.Common.AddressMask; + } +} L3_FLUSH_ADDRESS_RANGE; +STATIC_ASSERT(8 == sizeof(L3_FLUSH_ADDRESS_RANGE)); + +struct L3_CONTROL_POST_SYNC_DATA { + union tagTheStructure { + struct tagCommon { + uint64_t Reserved_96 : BITFIELD_RANGE(0, 2); + uint64_t Address : BITFIELD_RANGE(3, 47); + uint64_t Reserved_144 : BITFIELD_RANGE(48, 63); + uint64_t ImmediateData; + } Common; + uint32_t RawData[4]; + } TheStructure; + + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + } + + typedef enum tagADDRESS { + ADDRESS_BIT_SHIFT = 0x3, + ADDRESS_ALIGN_SIZE = 0x8, + } ADDRESS; + inline void setAddress(const uint64_t value) { + TheStructure.Common.Address = value >> ADDRESS_BIT_SHIFT; + } + inline uint64_t getAddress(void) const { + return TheStructure.Common.Address << ADDRESS_BIT_SHIFT; + } + inline void setImmediateData(const uint64_t value) { + TheStructure.Common.ImmediateData = value; + } + inline uint64_t getImmediateData(void) const { + return TheStructure.Common.ImmediateData; + } +}; + +struct L3_CONTROL { + union tagTheStructure { + struct tagCommon { + uint32_t Length : BITFIELD_RANGE(0, 7); + uint32_t DepthCacheFlush : BITFIELD_RANGE(8, 8); + uint32_t RenderTargetCacheFlushEnable : BITFIELD_RANGE(9, 9); + uint32_t HdcPipelineFlush : BITFIELD_RANGE(10, 10); + uint32_t Reserved_11 : BITFIELD_RANGE(11, 13); + uint32_t PostSyncOperation : BITFIELD_RANGE(14, 14); + uint32_t PostSyncOperationL3CacheabilityControl : BITFIELD_RANGE(15, 15); // removed on DG1 + uint32_t Reserved_16 : BITFIELD_RANGE(16, 19); + uint32_t CommandStreamerStallEnable : BITFIELD_RANGE(20, 20); + uint32_t DestinationAddressType : BITFIELD_RANGE(21, 21); + uint32_t Reserved_22 : BITFIELD_RANGE(22, 22); + uint32_t _3DCommandSubOpcode : BITFIELD_RANGE(23, 23); + uint32_t _3DCommandOpcode : BITFIELD_RANGE(24, 26); + uint32_t CommandSubtype : BITFIELD_RANGE(27, 28); + uint32_t Type : BITFIELD_RANGE(29, 31); + L3_CONTROL_POST_SYNC_DATA PostSyncData; + } Common; + uint32_t RawData[5]; + } TheStructure; + + typedef enum tagRENDER_TARGET_CACHE_FLUSH_ENABLE { + RENDER_TARGET_CACHE_FLUSH_DISABLED = 0x0, + RENDER_TARGET_CACHE_FLUSH_ENABLED = 0x1, + } RENDER_TARGET_CACHE_FLUSH_ENABLE; + typedef enum tagPOST_SYNC_OPERATION { + POST_SYNC_OPERATION_NO_WRITE = 0x0, + POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA = 0x1, + } POST_SYNC_OPERATION; + typedef enum tagPOST_SYNC_OPERATION_L3_CACHEABILITY_CONTROL { + POST_SYNC_OPERATION_L3_CACHEABILITY_CONTROL_DEFAULT_MOCS = 0x0, + POST_SYNC_OPERATION_L3_CACHEABILITY_CONTROL_CACHEABLE_MOCS = 0x1, + } POST_SYNC_OPERATION_L3_CACHEABILITY_CONTROL; + typedef enum tagDESTINATION_ADDRESS_TYPE { + DESTINATION_ADDRESS_TYPE_PPGTT = 0x0, + DESTINATION_ADDRESS_TYPE_GGTT = 0x1, + } DESTINATION_ADDRESS_TYPE; + typedef enum tag_3D_COMMAND_SUB_OPCODE { + _3D_COMMAND_SUB_OPCODE_L3_CONTROL = 0x1, + } _3D_COMMAND_SUB_OPCODE; + typedef enum tag_3D_COMMAND_OPCODE { + _3D_COMMAND_OPCODE_L3_CONTROL = 0x5, + } _3D_COMMAND_OPCODE; + typedef enum tagCOMMAND_SUBTYPE { + COMMAND_SUBTYPE_GFXPIPE_3D = 0x3, + } COMMAND_SUBTYPE; + typedef enum tagTYPE { + TYPE_GFXPIPE = 0x3, + } TYPE; + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.RenderTargetCacheFlushEnable = RENDER_TARGET_CACHE_FLUSH_ENABLED; + TheStructure.Common.PostSyncOperation = POST_SYNC_OPERATION_NO_WRITE; + TheStructure.Common.PostSyncOperationL3CacheabilityControl = POST_SYNC_OPERATION_L3_CACHEABILITY_CONTROL_DEFAULT_MOCS; + TheStructure.Common.DestinationAddressType = DESTINATION_ADDRESS_TYPE_PPGTT; + TheStructure.Common._3DCommandSubOpcode = _3D_COMMAND_SUB_OPCODE_L3_CONTROL; + TheStructure.Common._3DCommandOpcode = _3D_COMMAND_OPCODE_L3_CONTROL; + TheStructure.Common.CommandSubtype = COMMAND_SUBTYPE_GFXPIPE_3D; + TheStructure.Common.Type = TYPE_GFXPIPE; + TheStructure.Common.PostSyncData.init(); + } + static L3_CONTROL sInit(void) { + L3_CONTROL state; + state.init(); + return state; + } + inline uint32_t &getRawData(const uint32_t index) { + UNRECOVERABLE_IF(index >= 7); + return TheStructure.RawData[index]; + } + inline void setLength(const uint32_t value) { + TheStructure.Common.Length = value; + } + inline uint32_t getLength(void) const { + return TheStructure.Common.Length; + } + inline void setDepthCacheFlush(const bool value) { + TheStructure.Common.DepthCacheFlush = value; + } + inline bool getDepthCacheFlush(void) const { + return TheStructure.Common.DepthCacheFlush; + } + inline void setRenderTargetCacheFlushEnable(const bool value) { + TheStructure.Common.RenderTargetCacheFlushEnable = value; + } + inline bool getRenderTargetCacheFlushEnable(void) const { + return TheStructure.Common.RenderTargetCacheFlushEnable; + } + inline void setHdcPipelineFlush(const bool value) { + TheStructure.Common.HdcPipelineFlush = value; + } + inline bool getHdcPipelineFlush(void) const { + return TheStructure.Common.HdcPipelineFlush; + } + inline void setPostSyncOperation(const POST_SYNC_OPERATION value) { + TheStructure.Common.PostSyncOperation = value; + } + inline POST_SYNC_OPERATION getPostSyncOperation(void) const { + return static_cast(TheStructure.Common.PostSyncOperation); + } + inline void setPostSyncOperationL3CacheabilityControl(const POST_SYNC_OPERATION_L3_CACHEABILITY_CONTROL value) { + TheStructure.Common.PostSyncOperationL3CacheabilityControl = value; + } + inline POST_SYNC_OPERATION_L3_CACHEABILITY_CONTROL getPostSyncOperationL3CacheabilityControl(void) const { + return static_cast(TheStructure.Common.PostSyncOperationL3CacheabilityControl); + } + inline void setCommandStreamerStallEnable(const bool value) { + TheStructure.Common.CommandStreamerStallEnable = value; + } + inline bool getCommandStreamerStallEnable(void) const { + return TheStructure.Common.CommandStreamerStallEnable; + } + inline void setDestinationAddressType(const DESTINATION_ADDRESS_TYPE value) { + TheStructure.Common.DestinationAddressType = value; + } + inline DESTINATION_ADDRESS_TYPE getDestinationAddressType(void) const { + return static_cast(TheStructure.Common.DestinationAddressType); + } + inline void setType(const TYPE value) { + TheStructure.Common.Type = value; + } + inline TYPE getType(void) const { + return static_cast(TheStructure.Common.Type); + } + L3_CONTROL_POST_SYNC_DATA &getPostSyncData() { + return TheStructure.Common.PostSyncData; + } + + const L3_CONTROL_POST_SYNC_DATA &getPostSyncData() const { + return TheStructure.Common.PostSyncData; + } + + inline void setPostSyncAddress(const uint64_t value) { + getPostSyncData().setAddress(value); + } + + inline uint64_t getPostSyncAddress(void) const { + return getPostSyncData().getAddress(); + } + + inline void setPostSyncImmediateData(const uint64_t value) { + getPostSyncData().setImmediateData(value); + } + + inline uint64_t getPostSyncImmediateData(void) const { + return getPostSyncData().getImmediateData(); + } +}; +STATIC_ASSERT(20 == sizeof(L3_CONTROL)); +STATIC_ASSERT(std::is_pod::value); + +typedef struct tagSTATE_SIP { + union tagTheStructure { + struct tagCommon { + // DWORD 0 + uint32_t DwordLength : BITFIELD_RANGE(0, 7); + uint32_t Reserved_8 : BITFIELD_RANGE(8, 15); + uint32_t _3DCommandSubOpcode : BITFIELD_RANGE(16, 23); + uint32_t _3DCommandOpcode : BITFIELD_RANGE(24, 26); + uint32_t CommandSubtype : BITFIELD_RANGE(27, 28); + uint32_t CommandType : BITFIELD_RANGE(29, 31); + // DWORD 1 + uint64_t Reserved_32 : BITFIELD_RANGE(0, 3); + uint64_t SystemInstructionPointer : BITFIELD_RANGE(4, 63); + } Common; + uint32_t RawData[3]; + } TheStructure; + typedef enum tagDWORD_LENGTH { + DWORD_LENGTH_DWORD_COUNT_N = 0x1, + } DWORD_LENGTH; + typedef enum tag_3D_COMMAND_SUB_OPCODE { + _3D_COMMAND_SUB_OPCODE_STATE_SIP = 0x2, + } _3D_COMMAND_SUB_OPCODE; + typedef enum tag_3D_COMMAND_OPCODE { + _3D_COMMAND_OPCODE_GFXPIPE_NONPIPELINED = 0x1, + } _3D_COMMAND_OPCODE; + typedef enum tagCOMMAND_SUBTYPE { + COMMAND_SUBTYPE_GFXPIPE_COMMON = 0x0, + } COMMAND_SUBTYPE; + typedef enum tagCOMMAND_TYPE { + COMMAND_TYPE_GFXPIPE = 0x3, + } COMMAND_TYPE; + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.DwordLength = DWORD_LENGTH_DWORD_COUNT_N; + TheStructure.Common._3DCommandSubOpcode = _3D_COMMAND_SUB_OPCODE_STATE_SIP; + TheStructure.Common._3DCommandOpcode = _3D_COMMAND_OPCODE_GFXPIPE_NONPIPELINED; + TheStructure.Common.CommandSubtype = COMMAND_SUBTYPE_GFXPIPE_COMMON; + TheStructure.Common.CommandType = COMMAND_TYPE_GFXPIPE; + } + static tagSTATE_SIP sInit(void) { + STATE_SIP state; + state.init(); + return state; + } + inline uint32_t &getRawData(const uint32_t index) { + UNRECOVERABLE_IF(index >= 3); + return TheStructure.RawData[index]; + } + typedef enum tagSYSTEMINSTRUCTIONPOINTER { + SYSTEMINSTRUCTIONPOINTER_BIT_SHIFT = 0x4, + SYSTEMINSTRUCTIONPOINTER_ALIGN_SIZE = 0x10, + } SYSTEMINSTRUCTIONPOINTER; + inline void setSystemInstructionPointer(const uint64_t value) { + UNRECOVERABLE_IF(value > 0xffffffffffffffffL); + TheStructure.Common.SystemInstructionPointer = value >> SYSTEMINSTRUCTIONPOINTER_BIT_SHIFT; + } + inline uint64_t getSystemInstructionPointer(void) const { + return TheStructure.Common.SystemInstructionPointer << SYSTEMINSTRUCTIONPOINTER_BIT_SHIFT; + } +} STATE_SIP; +STATIC_ASSERT(12 == sizeof(STATE_SIP)); + +typedef struct tagSAMPLER_BORDER_COLOR_STATE { + union tagTheStructure { + struct tagCommon { + // DWORD 0 + float BorderColorRed; + // DWORD 1 + float BorderColorGreen; + // DWORD 2 + float BorderColorBlue; + // DWORD 3 + float BorderColorAlpha; + } Common; + uint32_t RawData[4]; + } TheStructure; + inline void init(void) { + memset(&TheStructure, 0, sizeof(TheStructure)); + TheStructure.Common.BorderColorRed = 0.0; + TheStructure.Common.BorderColorGreen = 0.0; + TheStructure.Common.BorderColorBlue = 0.0; + TheStructure.Common.BorderColorAlpha = 0.0; + } + static tagSAMPLER_BORDER_COLOR_STATE sInit(void) { + SAMPLER_BORDER_COLOR_STATE state; + state.init(); + return state; + } + inline uint32_t &getRawData(const uint32_t index) { + UNRECOVERABLE_IF(index >= 4); + return TheStructure.RawData[index]; + } + inline void setBorderColorRed(const float value) { + TheStructure.Common.BorderColorRed = value; + } + inline float getBorderColorRed(void) const { + return TheStructure.Common.BorderColorRed; + } + inline void setBorderColorGreen(const float value) { + TheStructure.Common.BorderColorGreen = value; + } + inline float getBorderColorGreen(void) const { + return TheStructure.Common.BorderColorGreen; + } + inline void setBorderColorBlue(const float value) { + TheStructure.Common.BorderColorBlue = value; + } + inline float getBorderColorBlue(void) const { + return TheStructure.Common.BorderColorBlue; + } + inline void setBorderColorAlpha(const float value) { + TheStructure.Common.BorderColorAlpha = value; + } + inline float getBorderColorAlpha(void) const { + return TheStructure.Common.BorderColorAlpha; + } +} SAMPLER_BORDER_COLOR_STATE; +STATIC_ASSERT(16 == sizeof(SAMPLER_BORDER_COLOR_STATE)); + +#pragma pack() diff --git a/shared/source/gmm_helper/gmm.h b/shared/source/gmm_helper/gmm.h index ad8763fd6e..8a03a227d5 100644 --- a/shared/source/gmm_helper/gmm.h +++ b/shared/source/gmm_helper/gmm.h @@ -7,8 +7,7 @@ #pragma once #include "shared/source/gmm_helper/gmm_lib.h" - -#include "storage_info.h" +#include "shared/source/memory_manager/definitions/storage_info.h" #include #include diff --git a/shared/source/helpers/CMakeLists.txt b/shared/source/helpers/CMakeLists.txt index 4ff8379018..892a570c5b 100644 --- a/shared/source/helpers/CMakeLists.txt +++ b/shared/source/helpers/CMakeLists.txt @@ -10,6 +10,8 @@ set(NEO_CORE_HELPERS ${CMAKE_CURRENT_SOURCE_DIR}/address_patch.h ${CMAKE_CURRENT_SOURCE_DIR}/affinity_mask.h ${CMAKE_CURRENT_SOURCE_DIR}/aligned_memory.h + ${CMAKE_CURRENT_SOURCE_DIR}/app_resource_defines.h + ${CMAKE_CURRENT_SOURCE_DIR}/app_resource_helper.h ${CMAKE_CURRENT_SOURCE_DIR}/api_specific_config.h ${CMAKE_CURRENT_SOURCE_DIR}/array_count.h ${CMAKE_CURRENT_SOURCE_DIR}/aux_translation.h @@ -110,14 +112,37 @@ set(NEO_CORE_HELPERS ${CMAKE_CURRENT_SOURCE_DIR}/definitions/mi_flush_args.h ${CMAKE_CURRENT_SOURCE_DIR}/definitions/pipe_control_args_base.h ${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/pipe_control_args.h + ${CMAKE_CURRENT_SOURCE_DIR}/${BRANCH_DIR_SUFFIX}/pipe_control_args.cpp ) -set_property(GLOBAL PROPERTY NEO_CORE_HELPERS ${NEO_CORE_HELPERS}) -set(NEO_CORE_SRCS_HELPERS_WINDOWS +if(SUPPORT_XEHP_PLUS) + list(APPEND NEO_CORE_HELPERS + ${CMAKE_CURRENT_SOURCE_DIR}/blit_commands_helper_xehp_plus.inl + ${CMAKE_CURRENT_SOURCE_DIR}/cache_flush_xehp_plus.inl + ${CMAKE_CURRENT_SOURCE_DIR}/extra_allocation_data_xehp_plus.inl + ${CMAKE_CURRENT_SOURCE_DIR}/hw_helper_xehp_plus.inl + ${CMAKE_CURRENT_SOURCE_DIR}/preamble_xehp_plus.inl + ${CMAKE_CURRENT_SOURCE_DIR}/state_base_address_xehp_plus.inl + ) +endif() + +if(WIN32) + list(APPEND NEO_CORE_HELPERS + ${CMAKE_CURRENT_SOURCE_DIR}/windows/app_resource_helper.cpp + ) +else() + list(APPEND NEO_CORE_HELPERS + ${CMAKE_CURRENT_SOURCE_DIR}/linux/app_resource_helper.cpp + ) +endif() + +set(NEO_CORE_HELPERS_GMM_CALLBACKS_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/windows/gmm_callbacks.cpp ${CMAKE_CURRENT_SOURCE_DIR}/windows/gmm_callbacks.h ${CMAKE_CURRENT_SOURCE_DIR}/windows/gmm_callbacks.inl ) -set_property(GLOBAL PROPERTY NEO_CORE_SRCS_HELPERS_WINDOWS ${NEO_CORE_SRCS_HELPERS_WINDOWS}) + +set_property(GLOBAL PROPERTY NEO_CORE_HELPERS_GMM_CALLBACKS_WINDOWS ${NEO_CORE_HELPERS_GMM_CALLBACKS_WINDOWS}) +set_property(GLOBAL PROPERTY NEO_CORE_HELPERS ${NEO_CORE_HELPERS}) add_subdirectories() diff --git a/shared/source/helpers/app_resource_defines.h b/shared/source/helpers/app_resource_defines.h new file mode 100644 index 0000000000..0d5ecab9d8 --- /dev/null +++ b/shared/source/helpers/app_resource_defines.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once +#include +#include + +namespace NEO { +namespace AppResourceDefines { +#if defined(_DEBUG) || (_RELEASE_INTERNAL) +constexpr bool resourceTagSupport = true; +#else +constexpr bool resourceTagSupport = false; +#endif + +template +static constexpr bool has_ResourceTag = false; + +template +static constexpr bool has_ResourceTag = true; + +constexpr uint32_t maxStrLen = 8u; +} // namespace AppResourceDefines +} // namespace NEO diff --git a/shared/source/helpers/app_resource_helper.h b/shared/source/helpers/app_resource_helper.h new file mode 100644 index 0000000000..3e393ee0ef --- /dev/null +++ b/shared/source/helpers/app_resource_helper.h @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/memory_manager/graphics_allocation.h" + +namespace NEO { +struct AppResourceHelper { + public: + static const char *getResourceTagStr(GraphicsAllocation::AllocationType type); + static void copyResourceTagStr(char *dst, GraphicsAllocation::AllocationType type, size_t size); +}; + +} // namespace NEO diff --git a/shared/source/helpers/blit_commands_helper_xehp_plus.inl b/shared/source/helpers/blit_commands_helper_xehp_plus.inl new file mode 100644 index 0000000000..138d5578d0 --- /dev/null +++ b/shared/source/helpers/blit_commands_helper_xehp_plus.inl @@ -0,0 +1,392 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/execution_environment/execution_environment.h" +#include "shared/source/execution_environment/root_device_environment.h" +#include "shared/source/gmm_helper/gmm.h" +#include "shared/source/gmm_helper/gmm_helper.h" +#include "shared/source/helpers/blit_commands_helper_base.inl" + +#include "opencl/source/helpers/hardware_commands_helper.h" + +#include "gmm_client_context.h" + +namespace NEO { + +template +uint64_t BlitCommandsHelper::getMaxBlitWidthOverride(const RootDeviceEnvironment &rootDeviceEnvironment) { + auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo(); + HwHelper &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); + if (hwHelper.getLocalMemoryAccessMode(hwInfo) == LocalMemoryAccessMode::CpuAccessAllowed) { + return 1024; + } + return 0; +} + +template +uint64_t BlitCommandsHelper::getMaxBlitHeightOverride(const RootDeviceEnvironment &rootDeviceEnvironment) { + auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo(); + HwHelper &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); + if (hwHelper.getLocalMemoryAccessMode(hwInfo) == LocalMemoryAccessMode::CpuAccessAllowed) { + return 1024; + } + return 0; +} + +template +void BlitCommandsHelper::appendBlitCommandsForBuffer(const BlitProperties &blitProperties, typename GfxFamily::XY_COPY_BLT &blitCmd, const RootDeviceEnvironment &rootDeviceEnvironment) { + using XY_COPY_BLT = typename GfxFamily::XY_COPY_BLT; + auto dstAllocation = blitProperties.dstAllocation; + auto srcAllocation = blitProperties.srcAllocation; + bool dstAllocationisCompressionEnabled = dstAllocation->getDefaultGmm() && dstAllocation->getDefaultGmm()->isCompressionEnabled; + bool srcAllocationisCompressionEnabled = srcAllocation->getDefaultGmm() && srcAllocation->getDefaultGmm()->isCompressionEnabled; + + appendClearColor(blitProperties, blitCmd); + + uint32_t compressionFormat = rootDeviceEnvironment.getGmmClientContext()->getSurfaceStateCompressionFormat(GMM_RESOURCE_FORMAT::GMM_FORMAT_GENERIC_8BIT); + if (DebugManager.flags.ForceBufferCompressionFormat.get() != -1) { + compressionFormat = DebugManager.flags.ForceBufferCompressionFormat.get(); + } + + auto compressionEnabledField = XY_COPY_BLT::COMPRESSION_ENABLE::COMPRESSION_ENABLE_COMPRESSION_ENABLE; + if (DebugManager.flags.ForceCompressionDisabledForCompressedBlitCopies.get() != -1) { + compressionEnabledField = static_cast(DebugManager.flags.ForceCompressionDisabledForCompressedBlitCopies.get()); + } + + if (dstAllocationisCompressionEnabled) { + blitCmd.setDestinationCompressionEnable(compressionEnabledField); + blitCmd.setDestinationAuxiliarysurfacemode(XY_COPY_BLT::AUXILIARY_SURFACE_MODE_AUX_CCS_E); + blitCmd.setDestinationCompressionFormat(compressionFormat); + } + if (srcAllocationisCompressionEnabled) { + blitCmd.setSourceCompressionEnable(compressionEnabledField); + blitCmd.setSourceAuxiliarysurfacemode(XY_COPY_BLT::AUXILIARY_SURFACE_MODE_AUX_CCS_E); + blitCmd.setSourceCompressionFormat(compressionFormat); + } + + if (MemoryPool::isSystemMemoryPool(dstAllocation->getMemoryPool())) { + blitCmd.setDestinationTargetMemory(XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_SYSTEM_MEM); + } + if (MemoryPool::isSystemMemoryPool(srcAllocation->getMemoryPool())) { + blitCmd.setSourceTargetMemory(XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_SYSTEM_MEM); + } + + appendExtraMemoryProperties(blitCmd, rootDeviceEnvironment); + + blitCmd.setSourceSurfaceWidth(blitCmd.getDestinationX2CoordinateRight()); + blitCmd.setSourceSurfaceHeight(blitCmd.getDestinationY2CoordinateBottom()); + + blitCmd.setDestinationSurfaceWidth(blitCmd.getDestinationX2CoordinateRight()); + blitCmd.setDestinationSurfaceHeight(blitCmd.getDestinationY2CoordinateBottom()); + + if (blitCmd.getDestinationY2CoordinateBottom() > 1) { + blitCmd.setDestinationSurfaceType(XY_COPY_BLT::SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D); + blitCmd.setSourceSurfaceType(XY_COPY_BLT::SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D); + } else { + blitCmd.setDestinationSurfaceType(XY_COPY_BLT::SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_1D); + blitCmd.setSourceSurfaceType(XY_COPY_BLT::SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_1D); + } + + if (AuxTranslationDirection::AuxToNonAux == blitProperties.auxTranslationDirection) { + blitCmd.setSpecialModeofOperation(XY_COPY_BLT::SPECIAL_MODE_OF_OPERATION::SPECIAL_MODE_OF_OPERATION_FULL_RESOLVE); + } else if (AuxTranslationDirection::NonAuxToAux == blitProperties.auxTranslationDirection) { + blitCmd.setSourceCompressionEnable(XY_COPY_BLT::COMPRESSION_ENABLE::COMPRESSION_ENABLE_COMPRESSION_DISABLE); + } + + DEBUG_BREAK_IF((AuxTranslationDirection::None != blitProperties.auxTranslationDirection) && + (dstAllocation != srcAllocation || !dstAllocationisCompressionEnabled)); + + auto mocsIndex = rootDeviceEnvironment.getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED); + + blitCmd.setDestinationMOCSvalue(mocsIndex); + blitCmd.setSourceMOCS(mocsIndex); + if (DebugManager.flags.OverrideBlitterMocs.get() != -1) { + blitCmd.setDestinationMOCSvalue(DebugManager.flags.OverrideBlitterMocs.get()); + blitCmd.setSourceMOCS(DebugManager.flags.OverrideBlitterMocs.get()); + } + if (DebugManager.flags.OverrideBlitterTargetMemory.get() != -1) { + if (DebugManager.flags.OverrideBlitterTargetMemory.get() == 0u) { + blitCmd.setDestinationTargetMemory(XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_SYSTEM_MEM); + blitCmd.setSourceTargetMemory(XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_SYSTEM_MEM); + } else if (DebugManager.flags.OverrideBlitterTargetMemory.get() == 1u) { + blitCmd.setDestinationTargetMemory(XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_LOCAL_MEM); + blitCmd.setSourceTargetMemory(XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_LOCAL_MEM); + } + } +} + +template +void setCompressionParamsForFillOperation(typename GfxFamily::XY_COLOR_BLT &xyColorBlt) { + using XY_COLOR_BLT = typename GfxFamily::XY_COLOR_BLT; + + xyColorBlt.setDestinationCompressionEnable(XY_COLOR_BLT::DESTINATION_COMPRESSION_ENABLE::DESTINATION_COMPRESSION_ENABLE_COMPRESSION_ENABLE); + xyColorBlt.setDestinationAuxiliarysurfacemode(XY_COLOR_BLT::DESTINATION_AUXILIARY_SURFACE_MODE::DESTINATION_AUXILIARY_SURFACE_MODE_AUX_CCS_E); +} + +template +void BlitCommandsHelper::appendBlitCommandsForFillBuffer(NEO::GraphicsAllocation *dstAlloc, typename GfxFamily::XY_COLOR_BLT &blitCmd, const RootDeviceEnvironment &rootDeviceEnvironment) { + using XY_COLOR_BLT = typename GfxFamily::XY_COLOR_BLT; + bool dstAllocationisCompressionEnabled = dstAlloc->getDefaultGmm() && dstAlloc->getDefaultGmm()->isCompressionEnabled; + + uint32_t compressionFormat = rootDeviceEnvironment.getGmmClientContext()->getSurfaceStateCompressionFormat(GMM_RESOURCE_FORMAT::GMM_FORMAT_GENERIC_8BIT); + if (DebugManager.flags.ForceBufferCompressionFormat.get() != -1) { + compressionFormat = DebugManager.flags.ForceBufferCompressionFormat.get(); + } + + if (dstAllocationisCompressionEnabled) { + setCompressionParamsForFillOperation(blitCmd); + blitCmd.setDestinationCompressionFormat(compressionFormat); + } + + if (MemoryPool::isSystemMemoryPool(dstAlloc->getMemoryPool())) { + blitCmd.setDestinationTargetMemory(XY_COLOR_BLT::DESTINATION_TARGET_MEMORY::DESTINATION_TARGET_MEMORY_SYSTEM_MEM); + } + + appendExtraMemoryProperties(blitCmd, rootDeviceEnvironment); + + auto mocsIndex = rootDeviceEnvironment.getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED); + + blitCmd.setDestinationMOCSvalue(mocsIndex); + if (DebugManager.flags.OverrideBlitterMocs.get() != -1) { + blitCmd.setDestinationMOCSvalue(DebugManager.flags.OverrideBlitterMocs.get()); + } + if (DebugManager.flags.OverrideBlitterTargetMemory.get() != -1) { + if (DebugManager.flags.OverrideBlitterTargetMemory.get() == 0u) { + blitCmd.setDestinationTargetMemory(XY_COLOR_BLT::DESTINATION_TARGET_MEMORY::DESTINATION_TARGET_MEMORY_SYSTEM_MEM); + } else if (DebugManager.flags.OverrideBlitterTargetMemory.get() == 1u) { + blitCmd.setDestinationTargetMemory(XY_COLOR_BLT::DESTINATION_TARGET_MEMORY::DESTINATION_TARGET_MEMORY_LOCAL_MEM); + } + } +} + +template +void BlitCommandsHelper::dispatchBlitMemoryColorFill(NEO::GraphicsAllocation *dstAlloc, uint32_t *pattern, size_t patternSize, LinearStream &linearStream, size_t size, const RootDeviceEnvironment &rootDeviceEnvironment) { + switch (patternSize) { + case 1: + NEO::BlitCommandsHelper::dispatchBlitMemoryFill<1>(dstAlloc, pattern, linearStream, size, rootDeviceEnvironment, COLOR_DEPTH::COLOR_DEPTH_8_BIT_COLOR); + break; + case 2: + NEO::BlitCommandsHelper::dispatchBlitMemoryFill<2>(dstAlloc, pattern, linearStream, size, rootDeviceEnvironment, COLOR_DEPTH::COLOR_DEPTH_16_BIT_COLOR); + break; + case 4: + NEO::BlitCommandsHelper::dispatchBlitMemoryFill<4>(dstAlloc, pattern, linearStream, size, rootDeviceEnvironment, COLOR_DEPTH::COLOR_DEPTH_32_BIT_COLOR); + break; + case 8: + NEO::BlitCommandsHelper::dispatchBlitMemoryFill<8>(dstAlloc, pattern, linearStream, size, rootDeviceEnvironment, COLOR_DEPTH::COLOR_DEPTH_64_BIT_COLOR); + break; + default: + NEO::BlitCommandsHelper::dispatchBlitMemoryFill<16>(dstAlloc, pattern, linearStream, size, rootDeviceEnvironment, COLOR_DEPTH::COLOR_DEPTH_128_BIT_COLOR); + } +} + +template +void BlitCommandsHelper::appendSurfaceType(const BlitProperties &blitProperties, typename GfxFamily::XY_COPY_BLT &blitCmd) { + using XY_COPY_BLT = typename GfxFamily::XY_COPY_BLT; + + if (blitProperties.srcAllocation->getDefaultGmm()) { + auto resInfo = blitProperties.srcAllocation->getDefaultGmm()->gmmResourceInfo.get(); + auto resourceType = resInfo->getResourceType(); + auto isArray = resInfo->getArraySize() > 1; + + if (resourceType == GMM_RESOURCE_TYPE::RESOURCE_1D) { + if (isArray) { + blitCmd.setSourceSurfaceType(XY_COPY_BLT::SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D); + } else { + blitCmd.setSourceSurfaceType(XY_COPY_BLT::SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_1D); + } + + } else if (resourceType == GMM_RESOURCE_TYPE::RESOURCE_2D) { + blitCmd.setSourceSurfaceType(XY_COPY_BLT::SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D); + } else if (resourceType == GMM_RESOURCE_TYPE::RESOURCE_3D) { + blitCmd.setSourceSurfaceType(XY_COPY_BLT::SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_3D); + } + } + + if (blitProperties.dstAllocation->getDefaultGmm()) { + auto resInfo = blitProperties.dstAllocation->getDefaultGmm()->gmmResourceInfo.get(); + auto resourceType = resInfo->getResourceType(); + auto isArray = resInfo->getArraySize() > 1; + + if (resourceType == GMM_RESOURCE_TYPE::RESOURCE_1D) { + if (isArray) { + blitCmd.setDestinationSurfaceType(XY_COPY_BLT::SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D); + } else { + blitCmd.setDestinationSurfaceType(XY_COPY_BLT::SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_1D); + } + } else if (resourceType == GMM_RESOURCE_TYPE::RESOURCE_2D) { + blitCmd.setDestinationSurfaceType(XY_COPY_BLT::SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D); + } else if (resourceType == GMM_RESOURCE_TYPE::RESOURCE_3D) { + blitCmd.setDestinationSurfaceType(XY_COPY_BLT::SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_3D); + } + } +} + +template +void BlitCommandsHelper::appendTilingType(const GMM_TILE_TYPE srcTilingType, const GMM_TILE_TYPE dstTilingType, typename GfxFamily::XY_COPY_BLT &blitCmd) { + using XY_COPY_BLT = typename GfxFamily::XY_COPY_BLT; + if (srcTilingType == GMM_TILED_4) { + blitCmd.setSourceTiling(XY_COPY_BLT::TILING::TILING_TILE4); + } else if (srcTilingType == GMM_TILED_64) { + blitCmd.setSourceTiling(XY_COPY_BLT::TILING::TILING_TILE64); + } else { + blitCmd.setSourceTiling(XY_COPY_BLT::TILING::TILING_LINEAR); + } + if (dstTilingType == GMM_TILED_4) { + blitCmd.setDestinationTiling(XY_COPY_BLT::TILING::TILING_TILE4); + } else if (dstTilingType == GMM_TILED_64) { + blitCmd.setDestinationTiling(XY_COPY_BLT::TILING::TILING_TILE64); + } else { + blitCmd.setDestinationTiling(XY_COPY_BLT::TILING::TILING_LINEAR); + } +} + +template +void BlitCommandsHelper::appendColorDepth(const BlitProperties &blitProperties, typename GfxFamily::XY_COPY_BLT &blitCmd) { + using XY_COPY_BLT = typename GfxFamily::XY_COPY_BLT; + switch (blitProperties.bytesPerPixel) { + default: + UNRECOVERABLE_IF(true); + case 1: + blitCmd.setColorDepth(XY_COPY_BLT::COLOR_DEPTH::COLOR_DEPTH_8_BIT_COLOR); + break; + case 2: + blitCmd.setColorDepth(XY_COPY_BLT::COLOR_DEPTH::COLOR_DEPTH_16_BIT_COLOR); + break; + case 4: + blitCmd.setColorDepth(XY_COPY_BLT::COLOR_DEPTH::COLOR_DEPTH_32_BIT_COLOR); + break; + case 8: + blitCmd.setColorDepth(XY_COPY_BLT::COLOR_DEPTH::COLOR_DEPTH_64_BIT_COLOR); + break; + case 16: + blitCmd.setColorDepth(XY_COPY_BLT::COLOR_DEPTH::COLOR_DEPTH_128_BIT_COLOR); + break; + } +} + +template +void BlitCommandsHelper::getBlitAllocationProperties(const GraphicsAllocation &allocation, uint32_t &pitch, uint32_t &qPitch, GMM_TILE_TYPE &tileType, uint32_t &mipTailLod, uint32_t &compressionDetails, const RootDeviceEnvironment &rootDeviceEnvironment) { + if (allocation.getDefaultGmm()) { + auto gmmResourceInfo = allocation.getDefaultGmm()->gmmResourceInfo.get(); + mipTailLod = gmmResourceInfo->getMipTailStartLodSurfaceState(); + auto resInfo = gmmResourceInfo->getResourceFlags()->Info; + if (resInfo.Tile4) { + tileType = GMM_TILED_4; + } else if (resInfo.Tile64) { + tileType = GMM_TILED_64; + } + + if (!resInfo.Linear) { + qPitch = gmmResourceInfo->getQPitch() ? static_cast(gmmResourceInfo->getQPitch()) : qPitch; + pitch = gmmResourceInfo->getRenderPitch() ? static_cast(gmmResourceInfo->getRenderPitch()) : pitch; + } + + auto gmmClientContext = rootDeviceEnvironment.getGmmClientContext(); + if (resInfo.MediaCompressed) { + compressionDetails = gmmClientContext->getMediaSurfaceStateCompressionFormat(gmmResourceInfo->getResourceFormat()); + } else if (resInfo.RenderCompressed) { + compressionDetails = gmmClientContext->getSurfaceStateCompressionFormat(gmmResourceInfo->getResourceFormat()); + } + } +} + +template +void BlitCommandsHelper::appendBlitCommandsForImages(const BlitProperties &blitProperties, typename GfxFamily::XY_COPY_BLT &blitCmd, const RootDeviceEnvironment &rootDeviceEnvironment, uint32_t &srcSlicePitch, uint32_t &dstSlicePitch) { + auto srcTileType = GMM_NOT_TILED; + auto dstTileType = GMM_NOT_TILED; + auto srcAllocation = blitProperties.srcAllocation; + auto dstAllocation = blitProperties.dstAllocation; + auto srcRowPitch = static_cast(blitProperties.srcRowPitch); + auto dstRowPitch = static_cast(blitProperties.dstRowPitch); + auto srcQPitch = static_cast(blitProperties.srcSize.y); + auto dstQPitch = static_cast(blitProperties.dstSize.y); + auto srcMipTailLod = 0u; + auto dstMipTailLod = 0u; + auto srcCompressionFormat = blitCmd.getSourceCompressionFormat(); + auto dstCompressionFormat = blitCmd.getDestinationCompressionFormat(); + + getBlitAllocationProperties(*srcAllocation, srcRowPitch, srcQPitch, srcTileType, srcMipTailLod, srcCompressionFormat, rootDeviceEnvironment); + getBlitAllocationProperties(*dstAllocation, dstRowPitch, dstQPitch, dstTileType, dstMipTailLod, dstCompressionFormat, rootDeviceEnvironment); + + srcSlicePitch = std::max(srcSlicePitch, srcRowPitch * srcQPitch); + dstSlicePitch = std::max(dstSlicePitch, dstRowPitch * dstQPitch); + + blitCmd.setSourcePitch(srcTileType == GMM_NOT_TILED ? srcRowPitch : srcRowPitch / 4); + blitCmd.setDestinationPitch(dstTileType == GMM_NOT_TILED ? dstRowPitch : dstRowPitch / 4); + blitCmd.setSourceSurfaceQpitch(srcQPitch / 4); + blitCmd.setDestinationSurfaceQpitch(dstQPitch / 4); + blitCmd.setSourceMipTailStartLOD(srcMipTailLod); + blitCmd.setDestinationMipTailStartLOD(dstMipTailLod); + blitCmd.setSourceSurfaceWidth(static_cast(blitProperties.srcSize.x)); + blitCmd.setSourceSurfaceHeight(static_cast(blitProperties.srcSize.y)); + blitCmd.setSourceSurfaceDepth(static_cast(blitProperties.srcSize.z)); + blitCmd.setDestinationSurfaceWidth(static_cast(blitProperties.dstSize.x)); + blitCmd.setDestinationSurfaceHeight(static_cast(blitProperties.dstSize.y)); + blitCmd.setDestinationSurfaceDepth(static_cast(blitProperties.dstSize.z)); + blitCmd.setSourceCompressionFormat(srcCompressionFormat); + blitCmd.setDestinationCompressionFormat(dstCompressionFormat); + + appendTilingType(srcTileType, dstTileType, blitCmd); + appendClearColor(blitProperties, blitCmd); +} + +template +void BlitCommandsHelper::appendSliceOffsets(const BlitProperties &blitProperties, typename GfxFamily::XY_COPY_BLT &blitCmd, uint32_t sliceIndex, const RootDeviceEnvironment &rootDeviceEnvironment, uint32_t srcSlicePitch, uint32_t dstSlicePitch) { + using XY_COPY_BLT = typename GfxFamily::XY_COPY_BLT; + auto srcAddress = blitProperties.srcGpuAddress; + auto dstAddress = blitProperties.dstGpuAddress; + + if (blitCmd.getSourceTiling() == XY_COPY_BLT::TILING::TILING_LINEAR) { + blitCmd.setSourceBaseAddress(ptrOffset(srcAddress, srcSlicePitch * (sliceIndex + blitProperties.srcOffset.z))); + } else { + blitCmd.setSourceArrayIndex((sliceIndex + static_cast(blitProperties.srcOffset.z)) + 1); + } + if (blitCmd.getDestinationTiling() == XY_COPY_BLT::TILING::TILING_LINEAR) { + blitCmd.setDestinationBaseAddress(ptrOffset(dstAddress, dstSlicePitch * (sliceIndex + blitProperties.dstOffset.z))); + } else { + blitCmd.setDestinationArrayIndex((sliceIndex + static_cast(blitProperties.dstOffset.z)) + 1); + } +} + +template +void BlitCommandsHelper::appendTilingEnable(typename GfxFamily::XY_COLOR_BLT &blitCmd) { + using XY_COLOR_BLT = typename GfxFamily::XY_COLOR_BLT; + blitCmd.setDestinationSurfaceType(XY_COLOR_BLT::DESTINATION_SURFACE_TYPE::DESTINATION_SURFACE_TYPE_2D); +} + +template +void BlitCommandsHelper::programGlobalSequencerFlush(LinearStream &commandStream) { + if (DebugManager.flags.GlobalSequencerFlushOnCopyEngine.get() != 0) { + using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT; + constexpr uint32_t globalInvalidationRegister = 0xB404u; + LriHelper::program(&commandStream, globalInvalidationRegister, 1u, false); + EncodeSempahore::addMiSemaphoreWaitCommand(commandStream, + globalInvalidationRegister, + 0u, + MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD, + true); + } +} + +template +size_t BlitCommandsHelper::getSizeForGlobalSequencerFlush() { + if (DebugManager.flags.GlobalSequencerFlushOnCopyEngine.get() != 0) { + return sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM) + sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT); + } + return 0u; +} + +template +bool BlitCommandsHelper::miArbCheckWaRequired() { + return true; +} + +template +void BlitCommandsHelper::appendClearColor(const BlitProperties &blitProperties, typename GfxFamily::XY_COPY_BLT &blitCmd) { +} + +} // namespace NEO diff --git a/shared/source/helpers/cache_flush_xehp_plus.inl b/shared/source/helpers/cache_flush_xehp_plus.inl new file mode 100644 index 0000000000..e294182e89 --- /dev/null +++ b/shared/source/helpers/cache_flush_xehp_plus.inl @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/command_stream/linear_stream.h" +#include "shared/source/helpers/l3_range.h" +#include "shared/source/utilities/range.h" + +#include "hw_cmds.h" + +namespace NEO { + +template +inline size_t getSizeNeededToFlushGpuCache(const Range &ranges, bool usePostSync) { + size_t size = sizeof(typename GfxFamily::L3_CONTROL) * (ranges.size() / maxFlushSubrangeCount + 1); + size += ranges.size() * sizeof(typename GfxFamily::L3_FLUSH_ADDRESS_RANGE); + return size; +} +template +inline size_t getSizeNeededForL3Control(const Range &ranges) { + size_t size = sizeof(typename GfxFamily::L3_CONTROL); + size += ranges.size() * sizeof(typename GfxFamily::L3_FLUSH_ADDRESS_RANGE); + return size; +} + +template +inline void flushGpuCache(LinearStream *commandStream, const Range &ranges, uint64_t postSyncAddress, const HardwareInfo &hwInfo) { + using L3_FLUSH_ADDRESS_RANGE = typename GfxFamily::L3_FLUSH_ADDRESS_RANGE; + using L3_CONTROL = typename GfxFamily::L3_CONTROL; + using L3_FLUSH_EVICTION_POLICY = typename GfxFamily::L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY; + + auto l3Control = reinterpret_cast(commandStream->getSpace(getSizeNeededForL3Control(ranges))); + auto cmdL3Control = GfxFamily::cmdInitL3Control; + + uint32_t basel3ControlLength = 3; + uint32_t sizeOfl3FlushAddressRangeInDwords = 2; + uint32_t length = basel3ControlLength + static_cast(ranges.size()) * sizeOfl3FlushAddressRangeInDwords; + cmdL3Control.setLength(length); + cmdL3Control.setHdcPipelineFlush(true); + if (postSyncAddress != 0) { + cmdL3Control.setPostSyncOperation(L3_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA); + cmdL3Control.getPostSyncData().setAddress(postSyncAddress); + cmdL3Control.getPostSyncData().setImmediateData(0); + } + *l3Control = cmdL3Control; + + l3Control++; + L3_FLUSH_ADDRESS_RANGE *l3Ranges = reinterpret_cast(l3Control); + L3_FLUSH_ADDRESS_RANGE cmdFlushRange = {}; + for (const L3Range *it = &*ranges.begin(), *end = &*ranges.end(); it != end; ++it, l3Ranges++) { + cmdFlushRange = GfxFamily::cmdInitL3FlushAddressRange; + + cmdFlushRange.setAddress(it->getMaskedAddress()); + cmdFlushRange.setAddressMask(it->getMask()); + cmdFlushRange.setL3FlushEvictionPolicy(static_cast(it->getPolicy())); + *l3Ranges = cmdFlushRange; + } +} +} // namespace NEO diff --git a/shared/source/helpers/definitions/hw_cmds.h b/shared/source/helpers/definitions/hw_cmds.h index 04d4caf136..49d28b68f8 100644 --- a/shared/source/helpers/definitions/hw_cmds.h +++ b/shared/source/helpers/definitions/hw_cmds.h @@ -18,3 +18,6 @@ #ifdef SUPPORT_GEN12LP #include "shared/source/gen12lp/hw_cmds.h" #endif +#ifdef SUPPORT_XE_HP_CORE +#include "shared/source/xe_hp_core/hw_cmds.h" +#endif \ No newline at end of file diff --git a/shared/source/helpers/definitions/pipe_control_args.h b/shared/source/helpers/definitions/pipe_control_args.h index b4d2814bad..91e85398b2 100644 --- a/shared/source/helpers/definitions/pipe_control_args.h +++ b/shared/source/helpers/definitions/pipe_control_args.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020 Intel Corporation + * Copyright (C) 2020-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -9,8 +9,11 @@ #include "shared/source/helpers/definitions/pipe_control_args_base.h" namespace NEO { +struct HardwareInfo; + struct PipeControlArgs : PipeControlArgsBase { PipeControlArgs() = default; PipeControlArgs(bool dcFlush) : PipeControlArgsBase(dcFlush) {} + void adjustArgs(const HardwareInfo &hwInfo); }; } // namespace NEO diff --git a/shared/source/helpers/definitions/pipe_control_args_base.h b/shared/source/helpers/definitions/pipe_control_args_base.h index 1c289f103e..82abaa1dab 100644 --- a/shared/source/helpers/definitions/pipe_control_args_base.h +++ b/shared/source/helpers/definitions/pipe_control_args_base.h @@ -19,6 +19,7 @@ struct PipeControlArgsBase { bool genericMediaStateClear = false; bool hdcPipelineFlush = false; bool tlbInvalidation = false; + bool compressionControlSurfaceCcsFlush = false; bool notifyEnable = false; protected: diff --git a/shared/source/helpers/extra_allocation_data_xehp_plus.inl b/shared/source/helpers/extra_allocation_data_xehp_plus.inl new file mode 100644 index 0000000000..180297cc3f --- /dev/null +++ b/shared/source/helpers/extra_allocation_data_xehp_plus.inl @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/helpers/hw_helper.h" +#include "shared/source/memory_manager/allocation_properties.h" + +namespace NEO { + +template <> +void HwHelperHw::setExtraAllocationData(AllocationData &allocationData, const AllocationProperties &properties, const HardwareInfo &hwInfo) const { + if (LocalMemoryAccessMode::CpuAccessDisallowed == getLocalMemoryAccessMode(hwInfo)) { + if (properties.allocationType == GraphicsAllocation::AllocationType::LINEAR_STREAM || + properties.allocationType == GraphicsAllocation::AllocationType::INTERNAL_HEAP || + properties.allocationType == GraphicsAllocation::AllocationType::PRINTF_SURFACE || + properties.allocationType == GraphicsAllocation::AllocationType::GPU_TIMESTAMP_DEVICE_BUFFER || + properties.allocationType == GraphicsAllocation::AllocationType::RING_BUFFER || + properties.allocationType == GraphicsAllocation::AllocationType::SEMAPHORE_BUFFER) { + allocationData.flags.useSystemMemory = true; + } + if (!allocationData.flags.useSystemMemory) { + allocationData.flags.requiresCpuAccess = false; + allocationData.storageInfo.isLockable = false; + } + } +} +} // namespace NEO diff --git a/shared/source/helpers/hw_helper_xehp_plus.inl b/shared/source/helpers/hw_helper_xehp_plus.inl new file mode 100644 index 0000000000..bee46219b9 --- /dev/null +++ b/shared/source/helpers/hw_helper_xehp_plus.inl @@ -0,0 +1,204 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/aub/aub_helper.h" +#include "shared/source/command_container/command_encoder.h" +#include "shared/source/gmm_helper/gmm_helper.h" +#include "shared/source/helpers/heap_assigner.h" +#include "shared/source/kernel/grf_config.h" +#include "shared/source/memory_manager/memory_manager.h" + +#include "engine_node.h" +#include "pipe_control_args.h" + +namespace NEO { + +template +void HwHelperHw::adjustDefaultEngineType(HardwareInfo *pHwInfo) { + if (!pHwInfo->featureTable.ftrCCSNode) { + pHwInfo->capabilityTable.defaultEngineType = aub_stream::ENGINE_RCS; + } +} + +template +uint32_t HwHelperHw::getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const { + if (DebugManager.flags.OverrideNumComputeUnitsForScratch.get() != -1) { + return static_cast(DebugManager.flags.OverrideNumComputeUnitsForScratch.get()); + } + + // XeHP plus products return physical threads + return pHwInfo->gtSystemInfo.MaxSubSlicesSupported * pHwInfo->gtSystemInfo.MaxEuPerSubSlice * (pHwInfo->gtSystemInfo.ThreadCount / pHwInfo->gtSystemInfo.EUCount); +} + +template +inline uint32_t HwHelperHw::getGlobalTimeStampBits() const { + return 32; +} + +template +void HwHelperHw::setCapabilityCoherencyFlag(const HardwareInfo *pHwInfo, bool &coherencyFlag) { + coherencyFlag = false; +} + +template +bool HwHelperHw::isLocalMemoryEnabled(const HardwareInfo &hwInfo) const { + return hwInfo.featureTable.ftrLocalMemory; +} + +template +bool HwHelperHw::heapInLocalMem(const HardwareInfo &hwInfo) const { + return !(hwInfo.platform.eProductFamily == IGFX_XE_HP_SDV && isWorkaroundRequired(REVISION_A0, REVISION_B, hwInfo)); +} + +template +bool HwHelperHw::hvAlign4Required() const { + return false; +} + +template +bool HwHelperHw::timestampPacketWriteSupported() const { + return true; +} + +template +bool HwHelperHw::obtainBlitterPreference(const HardwareInfo &hwInfo) const { + return true; +} + +template +const HwHelper::EngineInstancesContainer HwHelperHw::getGpgpuEngineInstances(const HardwareInfo &hwInfo) const { + auto defaultEngine = getChosenEngineType(hwInfo); + + EngineInstancesContainer engines; + + if ((DebugManager.flags.NodeOrdinal.get() == static_cast(aub_stream::EngineType::ENGINE_RCS)) || + hwInfo.featureTable.ftrRcsNode) { + engines.push_back({aub_stream::ENGINE_RCS, EngineUsage::Regular}); + } + + engines.push_back({defaultEngine, EngineUsage::LowPriority}); + engines.push_back({defaultEngine, EngineUsage::Internal}); + + if (hwInfo.featureTable.ftrCCSNode) { + for (uint32_t i = 0; i < hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled; i++) { + engines.push_back({static_cast(i + aub_stream::ENGINE_CCS), EngineUsage::Regular}); + } + } + + if (hwInfo.featureTable.ftrBcsInfo.test(0)) { + engines.push_back({aub_stream::ENGINE_BCS, EngineUsage::Regular}); + engines.push_back({aub_stream::ENGINE_BCS, EngineUsage::Internal}); // internal usage + } + + return engines; +}; + +template +EngineGroupType HwHelperHw::getEngineGroupType(aub_stream::EngineType engineType, const HardwareInfo &hwInfo) const { + if (engineType == aub_stream::ENGINE_RCS) { + return EngineGroupType::RenderCompute; + } + if (engineType >= aub_stream::ENGINE_CCS && engineType < (aub_stream::ENGINE_CCS + hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled)) { + return EngineGroupType::Compute; + } + if (engineType == aub_stream::ENGINE_BCS) { + return EngineGroupType::Copy; + } + UNRECOVERABLE_IF(true); +} + +template +uint32_t HwHelperHw::getMocsIndex(const GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const { + if (l3enabled) { + if (DebugManager.flags.ForceL1Caching.get() == 0) { + if (l1enabled) { + return gmmHelper.getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST) >> 1; + } + return gmmHelper.getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1; + } else { + return gmmHelper.getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST) >> 1; + } + } + + return gmmHelper.getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1; +} + +template +uint32_t HwHelperHw::calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount, + uint32_t threadsPerEu) { + if (grfCount > GrfConfig::DefaultGrfNumber) { + return threadsPerEu / 2u * euCount; + } + return threadsPerEu * euCount; +} + +template +uint64_t HwHelperHw::getGpuTimeStampInNS(uint64_t timeStamp, double frequency) const { + return static_cast((timeStamp & 0xffff'ffff) * frequency); +} + +constexpr uint32_t planarYuvMaxHeight = 16128; + +template +uint32_t HwHelperHw::getPlanarYuvMaxHeight() const { + return planarYuvMaxHeight; +} + +template +aub_stream::MMIOList HwHelperHw::getExtraMmioList(const HardwareInfo &hwInfo, const GmmHelper &gmmHelper) const { + aub_stream::MMIOList mmioList; + + if (DebugManager.flags.EnableStatelessCompressionWithUnifiedMemory.get()) { + auto format = static_cast(DebugManager.flags.FormatForStatelessCompressionWithUnifiedMemory.get()); + + UNRECOVERABLE_IF(format > 0x1F); + + uint32_t value = 1; // [0] enable + value |= (format << 3); // [3:7] compression_format + + mmioList.push_back({0x519C, value}); + mmioList.push_back({0xB0F0, value}); + mmioList.push_back({0xE4C0, value}); + } + + return mmioList; +} + +template +bool MemorySynchronizationCommands::isPipeControlWArequired(const HardwareInfo &hwInfo) { + if (DebugManager.flags.DisablePipeControlPrecedingPostSyncCommand.get() == 1) { + return hwInfo.featureTable.ftrLocalMemory; + } + return false; +} + +template +inline bool HwHelperHw::preferSmallWorkgroupSizeForKernel(const size_t size, const HardwareInfo &hwInfo) const { + if (getSteppingFromHwRevId(hwInfo) >= REVISION_B) { + return false; + } + + auto defaultThreshold = 2048u; + if (DebugManager.flags.OverrideKernelSizeLimitForSmallDispatch.get() != -1) { + defaultThreshold = DebugManager.flags.OverrideKernelSizeLimitForSmallDispatch.get(); + } + + if (size >= defaultThreshold) { + return false; + } + return true; +} + +template +inline uint32_t HwHelperHw::getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const { + if (isWorkaroundRequired(REVISION_A0, REVISION_B, hwInfo)) { + return std::min(HwHelper::getMaxThreadsForWorkgroup(hwInfo, maxNumEUsPerSubSlice), 64u); + } + return HwHelper::getMaxThreadsForWorkgroup(hwInfo, maxNumEUsPerSubSlice); +} + +} // namespace NEO diff --git a/shared/source/helpers/linux/app_resource_helper.cpp b/shared/source/helpers/linux/app_resource_helper.cpp new file mode 100644 index 0000000000..c68e7aafd6 --- /dev/null +++ b/shared/source/helpers/linux/app_resource_helper.cpp @@ -0,0 +1,15 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/helpers/app_resource_helper.h" + +namespace NEO { + +void AppResourceHelper::copyResourceTagStr(char *dst, GraphicsAllocation::AllocationType type, size_t size) {} +const char *AppResourceHelper::getResourceTagStr(GraphicsAllocation::AllocationType type) { return ""; } + +} // namespace NEO diff --git a/shared/source/helpers/pipe_control_args.cpp b/shared/source/helpers/pipe_control_args.cpp new file mode 100644 index 0000000000..497b3fbfaa --- /dev/null +++ b/shared/source/helpers/pipe_control_args.cpp @@ -0,0 +1,15 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "pipe_control_args.h" + +#include "shared/source/helpers/hw_info.h" + +namespace NEO { +void PipeControlArgs::adjustArgs(const HardwareInfo &hwInfo) { +} +} // namespace NEO diff --git a/shared/source/helpers/pipeline_select_helper.h b/shared/source/helpers/pipeline_select_helper.h index ca5cfe6220..4b27607bb2 100644 --- a/shared/source/helpers/pipeline_select_helper.h +++ b/shared/source/helpers/pipeline_select_helper.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -12,4 +12,5 @@ namespace NEO { const uint32_t pipelineSelectEnablePipelineSelectMaskBits = 0x3; const uint32_t pipelineSelectMediaSamplerDopClockGateMaskBits = 0x10; const uint32_t pipelineSelectMediaSamplerPowerClockGateMaskBits = 0x40; +const uint32_t pipelineSelectSystolicModeEnableMaskBits = 0x80; } // namespace NEO diff --git a/shared/source/helpers/preamble_xehp_plus.inl b/shared/source/helpers/preamble_xehp_plus.inl new file mode 100644 index 0000000000..39590e1a9a --- /dev/null +++ b/shared/source/helpers/preamble_xehp_plus.inl @@ -0,0 +1,151 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/command_stream/csr_definitions.h" +#include "shared/source/command_stream/stream_properties.h" +#include "shared/source/debug_settings/debug_settings_manager.h" +#include "shared/source/helpers/hw_helper.h" +#include "shared/source/helpers/pipeline_select_helper.h" +#include "shared/source/helpers/preamble_base.inl" + +#include "reg_configs_common.h" + +// L3 programming: +// All L3 Client Pool: 320KB +// URB Pool: 64KB +// Use Full ways: true +// SLM: reserved (always enabled) + +namespace NEO { + +template <> +void PreambleHelper::programPipelineSelect(LinearStream *pCommandStream, + const PipelineSelectArgs &pipelineSelectArgs, + const HardwareInfo &hwInfo) { + + using PIPELINE_SELECT = typename Family::PIPELINE_SELECT; + + PIPELINE_SELECT cmd = Family::cmdInitPipelineSelect; + + if (DebugManager.flags.CleanStateInPreamble.get()) { + auto pCmd = pCommandStream->getSpaceForCmd(); + cmd.setPipelineSelection(PIPELINE_SELECT::PIPELINE_SELECTION_3D); + *pCmd = cmd; + + auto pipeControl = Family::cmdInitPipeControl; + pipeControl.setStateCacheInvalidationEnable(true); + auto pipeControlBuffer = pCommandStream->getSpaceForCmd(); + *pipeControlBuffer = pipeControl; + } + + auto pCmd = pCommandStream->getSpaceForCmd(); + + auto mask = pipelineSelectEnablePipelineSelectMaskBits | + pipelineSelectMediaSamplerDopClockGateMaskBits | + pipelineSelectSystolicModeEnableMaskBits; + + cmd.setMaskBits(mask); + cmd.setPipelineSelection(PIPELINE_SELECT::PIPELINE_SELECTION_GPGPU); + cmd.setMediaSamplerDopClockGateEnable(!pipelineSelectArgs.mediaSamplerRequired); + cmd.setSystolicModeEnable(pipelineSelectArgs.specialPipelineSelectMode); + + if (DebugManager.flags.OverrideSystolicPipelineSelect.get() != -1) { + cmd.setSystolicModeEnable(DebugManager.flags.OverrideSystolicPipelineSelect.get()); + } + + *pCmd = cmd; + + if (DebugManager.flags.CleanStateInPreamble.get()) { + auto pipeControl = Family::cmdInitPipeControl; + pipeControl.setStateCacheInvalidationEnable(true); + auto pipeControlBuffer = pCommandStream->getSpaceForCmd(); + *pipeControlBuffer = pipeControl; + } +} + +template <> +void PreambleHelper::addPipeControlBeforeVfeCmd(LinearStream *pCommandStream, const HardwareInfo *hwInfo, EngineGroupType engineGroupType) { +} + +template <> +void PreambleHelper::programL3(LinearStream *pCommandStream, uint32_t l3Config) { +} + +template <> +uint32_t PreambleHelper::getUrbEntryAllocationSize() { + return 0u; +} +template <> +void PreambleHelper::appendProgramVFEState(const HardwareInfo &hwInfo, const StreamProperties &streamProperties, uint32_t additionalKernelExecInfo, void *cmd); + +template <> +void *PreambleHelper::getSpaceForVfeState(LinearStream *pCommandStream, + const HardwareInfo &hwInfo, + EngineGroupType engineGroupType) { + using CFE_STATE = typename Family::CFE_STATE; + return pCommandStream->getSpace(sizeof(CFE_STATE)); +} + +template <> +void PreambleHelper::programVfeState(void *pVfeState, + const HardwareInfo &hwInfo, + uint32_t scratchSize, + uint64_t scratchAddress, + uint32_t maxFrontEndThreads, + uint32_t additionalKernelExecInfo, + const StreamProperties &streamProperties) { + using CFE_STATE = typename Family::CFE_STATE; + + auto cfeState = reinterpret_cast(pVfeState); + CFE_STATE cmd = Family::cmdInitCfeState; + + cmd.setNumberOfWalkers(1); + + uint32_t lowAddress = uint32_t(0xFFFFFFFF & scratchAddress); + cmd.setScratchSpaceBuffer(lowAddress); + cmd.setMaximumNumberOfThreads(maxFrontEndThreads); + appendProgramVFEState(hwInfo, streamProperties, additionalKernelExecInfo, &cmd); + + if (DebugManager.flags.CFENumberOfWalkers.get() != -1) { + cmd.setNumberOfWalkers(DebugManager.flags.CFENumberOfWalkers.get()); + } + if (DebugManager.flags.CFEMaximumNumberOfThreads.get() != -1) { + cmd.setMaximumNumberOfThreads(DebugManager.flags.CFEMaximumNumberOfThreads.get()); + } + if (DebugManager.flags.CFEOverDispatchControl.get() != -1) { + cmd.setOverDispatchControl(static_cast(DebugManager.flags.CFEOverDispatchControl.get())); + } + if (DebugManager.flags.CFEFusedEUDispatch.get() != -1) { + cmd.setFusedEuDispatch(DebugManager.flags.CFEFusedEUDispatch.get()); + } + if (DebugManager.flags.CFELargeGRFThreadAdjustDisable.get() != -1) { + cmd.setLargeGRFThreadAdjustDisable(DebugManager.flags.CFELargeGRFThreadAdjustDisable.get()); + } + + *cfeState = cmd; +} + +template <> +uint64_t PreambleHelper::getScratchSpaceAddressOffsetForVfeState(LinearStream *pCommandStream, void *pVfeState) { + return 0; +} + +template <> +size_t PreambleHelper::getVFECommandsSize() { + using CFE_STATE = typename Family::CFE_STATE; + return sizeof(CFE_STATE); +} + +template <> +uint32_t PreambleHelper::getL3Config(const HardwareInfo &hwInfo, bool useSLM) { + return 0u; +} + +template <> +const uint32_t L3CNTLRegisterOffset::registerOffset = std::numeric_limits::max(); + +} // namespace NEO diff --git a/shared/source/helpers/state_base_address_xehp_plus.inl b/shared/source/helpers/state_base_address_xehp_plus.inl new file mode 100644 index 0000000000..ebf3973937 --- /dev/null +++ b/shared/source/helpers/state_base_address_xehp_plus.inl @@ -0,0 +1,126 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/command_stream/csr_definitions.h" +#include "shared/source/debug_settings/debug_settings_manager.h" +#include "shared/source/helpers/api_specific_config.h" +#include "shared/source/helpers/state_base_address_base.inl" + +#include "gmm_client_context.h" + +namespace NEO { + +template +void setSbaStatelessCompressionParams(typename GfxFamily::STATE_BASE_ADDRESS *stateBaseAddress, MemoryCompressionState memoryCompressionState) { + using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS; + + if (memoryCompressionState == MemoryCompressionState::Enabled) { + stateBaseAddress->setEnableMemoryCompressionForAllStatelessAccesses(STATE_BASE_ADDRESS::ENABLE_MEMORY_COMPRESSION_FOR_ALL_STATELESS_ACCESSES_ENABLED); + } else { + stateBaseAddress->setEnableMemoryCompressionForAllStatelessAccesses(STATE_BASE_ADDRESS::ENABLE_MEMORY_COMPRESSION_FOR_ALL_STATELESS_ACCESSES_DISABLED); + } +} + +template +void StateBaseAddressHelper::appendStateBaseAddressParameters( + STATE_BASE_ADDRESS *stateBaseAddress, + const IndirectHeap *ssh, + bool setGeneralStateBaseAddress, + uint64_t internalHeapBase, + GmmHelper *gmmHelper, + bool isMultiOsContextCapable, + MemoryCompressionState memoryCompressionState, + bool overrideBindlessSurfaceStateBase, + bool useGlobalAtomics, + bool areMultipleSubDevicesInContext) { + using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; + using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS; + + if (setGeneralStateBaseAddress && is64bit) { + stateBaseAddress->setGeneralStateBaseAddress(GmmHelper::decanonize(internalHeapBase)); + } + + if (overrideBindlessSurfaceStateBase && ssh) { + stateBaseAddress->setBindlessSurfaceStateBaseAddress(ssh->getHeapGpuBase()); + stateBaseAddress->setBindlessSurfaceStateBaseAddressModifyEnable(true); + const auto surfaceStateCount = ssh->getMaxAvailableSpace() / sizeof(RENDER_SURFACE_STATE); + stateBaseAddress->setBindlessSurfaceStateSize(static_cast(surfaceStateCount - 1)); + } + + stateBaseAddress->setBindlessSamplerStateBaseAddressModifyEnable(true); + + auto l3CacheOnPolicy = GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER; + auto l1L3CacheOnPolicy = GMM_RESOURCE_USAGE_OCL_INLINE_CONST_HDC; + + if (DebugManager.flags.DisableCachingForHeaps.get()) { + l3CacheOnPolicy = GMM_RESOURCE_USAGE_OCL_SYSTEM_MEMORY_BUFFER_CACHELINE_MISALIGNED; + l1L3CacheOnPolicy = GMM_RESOURCE_USAGE_OCL_SYSTEM_MEMORY_BUFFER_CACHELINE_MISALIGNED; + stateBaseAddress->setInstructionMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_SYSTEM_MEMORY_BUFFER_CACHELINE_MISALIGNED)); + } + + stateBaseAddress->setIndirectObjectMemoryObjectControlStateIndexToMocsTables(gmmHelper->getMOCS(l1L3CacheOnPolicy)); + stateBaseAddress->setSurfaceStateMemoryObjectControlStateIndexToMocsTables(gmmHelper->getMOCS(l3CacheOnPolicy)); + stateBaseAddress->setDynamicStateMemoryObjectControlStateIndexToMocsTables(gmmHelper->getMOCS(l3CacheOnPolicy)); + stateBaseAddress->setGeneralStateMemoryObjectControlStateIndexToMocsTables(gmmHelper->getMOCS(l3CacheOnPolicy)); + stateBaseAddress->setBindlessSurfaceStateMemoryObjectControlStateIndexToMocsTables(gmmHelper->getMOCS(l3CacheOnPolicy)); + + bool enableMultiGpuAtomics = isMultiOsContextCapable; + if (DebugManager.flags.EnableMultiGpuAtomicsOptimization.get()) { + enableMultiGpuAtomics = useGlobalAtomics && (isMultiOsContextCapable || areMultipleSubDevicesInContext); + } + stateBaseAddress->setDisableSupportForMultiGpuAtomicsForStatelessAccesses(!enableMultiGpuAtomics); + + stateBaseAddress->setDisableSupportForMultiGpuPartialWritesForStatelessMessages(!isMultiOsContextCapable); + + if (DebugManager.flags.ForceMultiGpuAtomics.get() != -1) { + stateBaseAddress->setDisableSupportForMultiGpuAtomicsForStatelessAccesses(!!DebugManager.flags.ForceMultiGpuAtomics.get()); + } + + if (DebugManager.flags.ForceMultiGpuPartialWrites.get() != -1) { + stateBaseAddress->setDisableSupportForMultiGpuPartialWritesForStatelessMessages(!!DebugManager.flags.ForceMultiGpuPartialWrites.get()); + } + + if (memoryCompressionState != MemoryCompressionState::NotApplicable) { + setSbaStatelessCompressionParams(stateBaseAddress, memoryCompressionState); + } + + int32_t cachingPolicySetting = DebugManager.flags.UseCachingPolicyForIndirectObjectHeap.get(); + uint32_t indirectObjectHeapCachingPolicy = l1L3CacheOnPolicy; + + if (cachingPolicySetting != -1) { + if (cachingPolicySetting == 0) { + indirectObjectHeapCachingPolicy = GMM_RESOURCE_USAGE_OCL_SYSTEM_MEMORY_BUFFER_CACHELINE_MISALIGNED; + } else if (cachingPolicySetting == 1) { + indirectObjectHeapCachingPolicy = GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER; + } + } + stateBaseAddress->setIndirectObjectMemoryObjectControlStateIndexToMocsTables(gmmHelper->getMOCS(indirectObjectHeapCachingPolicy)); + + if (stateBaseAddress->getStatelessDataPortAccessMemoryObjectControlState() == gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) && DebugManager.flags.ForceL1Caching.get() != 0) { + stateBaseAddress->setStatelessDataPortAccessMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST)); + } + + appendExtraCacheSettings(stateBaseAddress, gmmHelper); +} + +template +void StateBaseAddressHelper::programBindingTableBaseAddress(LinearStream &commandStream, const IndirectHeap &ssh, GmmHelper *gmmHelper) { + using _3DSTATE_BINDING_TABLE_POOL_ALLOC = typename GfxFamily::_3DSTATE_BINDING_TABLE_POOL_ALLOC; + + auto bindingTablePoolAlloc = commandStream.getSpaceForCmd<_3DSTATE_BINDING_TABLE_POOL_ALLOC>(); + _3DSTATE_BINDING_TABLE_POOL_ALLOC cmd = GfxFamily::cmdInitStateBindingTablePoolAlloc; + cmd.setBindingTablePoolBaseAddress(ssh.getHeapGpuBase()); + cmd.setBindingTablePoolBufferSize(ssh.getHeapSizeInPages()); + cmd.setSurfaceObjectControlStateIndexToMocsTables(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_STATE_HEAP_BUFFER)); + if (DebugManager.flags.DisableCachingForHeaps.get()) { + cmd.setSurfaceObjectControlStateIndexToMocsTables(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_SYSTEM_MEMORY_BUFFER_CACHELINE_MISALIGNED)); + } + + *bindingTablePoolAlloc = cmd; +} + +} // namespace NEO diff --git a/shared/source/helpers/windows/app_resource_helper.cpp b/shared/source/helpers/windows/app_resource_helper.cpp new file mode 100644 index 0000000000..097f07f455 --- /dev/null +++ b/shared/source/helpers/windows/app_resource_helper.cpp @@ -0,0 +1,124 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/helpers/app_resource_helper.h" + +#include "shared/source/debug_settings/debug_settings_manager.h" +#include "shared/source/helpers/app_resource_defines.h" +#include "shared/source/helpers/string.h" + +namespace NEO { + +void AppResourceHelper::copyResourceTagStr(char *dst, GraphicsAllocation::AllocationType type, size_t size) { + if constexpr (AppResourceDefines::resourceTagSupport) { + if (DebugManager.flags.EnableResourceTags.get()) { + auto tag = getResourceTagStr(type); + strcpy_s(dst, size, tag); + } + } +} + +const char *AppResourceHelper::getResourceTagStr(GraphicsAllocation::AllocationType type) { + switch (type) { + case GraphicsAllocation::AllocationType::UNKNOWN: + return "UNKNOWN"; + case GraphicsAllocation::AllocationType::BUFFER: + return "BUFFER"; + case GraphicsAllocation::AllocationType::BUFFER_COMPRESSED: + return "BFCMPRSD"; + case GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY: + return "BFHSTMEM"; + case GraphicsAllocation::AllocationType::COMMAND_BUFFER: + return "CMNDBUFF"; + case GraphicsAllocation::AllocationType::CONSTANT_SURFACE: + return "CSNTSRFC"; + case GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER: + return "DEVQUEBF"; + case GraphicsAllocation::AllocationType::EXTERNAL_HOST_PTR: + return "EXHSTPTR"; + case GraphicsAllocation::AllocationType::FILL_PATTERN: + return "FILPATRN"; + case GraphicsAllocation::AllocationType::GLOBAL_SURFACE: + return "GLBLSRFC"; + case GraphicsAllocation::AllocationType::IMAGE: + return "IMAGE"; + case GraphicsAllocation::AllocationType::INDIRECT_OBJECT_HEAP: + return "INOBHEAP"; + case GraphicsAllocation::AllocationType::INSTRUCTION_HEAP: + return "INSTHEAP"; + case GraphicsAllocation::AllocationType::INTERNAL_HEAP: + return "INTLHEAP"; + case GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY: + return "INHSTMEM"; + case GraphicsAllocation::AllocationType::KERNEL_ISA: + return "KERNLISA"; + case GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL: + return "KRLISAIN"; + case GraphicsAllocation::AllocationType::LINEAR_STREAM: + return "LINRSTRM"; + case GraphicsAllocation::AllocationType::MAP_ALLOCATION: + return "MAPALLOC"; + case GraphicsAllocation::AllocationType::MCS: + return "MCS"; + case GraphicsAllocation::AllocationType::PIPE: + return "PIPE"; + case GraphicsAllocation::AllocationType::PREEMPTION: + return "PRMPTION"; + case GraphicsAllocation::AllocationType::PRINTF_SURFACE: + return "PRNTSRFC"; + case GraphicsAllocation::AllocationType::PRIVATE_SURFACE: + return "PRVTSRFC"; + case GraphicsAllocation::AllocationType::PROFILING_TAG_BUFFER: + return "PROFTGBF"; + case GraphicsAllocation::AllocationType::SCRATCH_SURFACE: + return "SCRHSRFC"; + case GraphicsAllocation::AllocationType::SHARED_BUFFER: + return "SHRDBUFF"; + case GraphicsAllocation::AllocationType::SHARED_CONTEXT_IMAGE: + return "SRDCXIMG"; + case GraphicsAllocation::AllocationType::SHARED_IMAGE: + return "SHERDIMG"; + case GraphicsAllocation::AllocationType::SHARED_RESOURCE_COPY: + return "SRDRSCCP"; + case GraphicsAllocation::AllocationType::SURFACE_STATE_HEAP: + return "SRFCSTHP"; + case GraphicsAllocation::AllocationType::SVM_CPU: + return "SVM_CPU"; + case GraphicsAllocation::AllocationType::SVM_GPU: + return "SVM_GPU"; + case GraphicsAllocation::AllocationType::SVM_ZERO_COPY: + return "SVM0COPY"; + case GraphicsAllocation::AllocationType::TAG_BUFFER: + return "TAGBUFER"; + case GraphicsAllocation::AllocationType::GLOBAL_FENCE: + return "GLBLFENC"; + case GraphicsAllocation::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER: + return "TSPKTGBF"; + case GraphicsAllocation::AllocationType::WRITE_COMBINED: + return "WRTCMBND"; + case GraphicsAllocation::AllocationType::RING_BUFFER: + return "RINGBUFF"; + case GraphicsAllocation::AllocationType::SEMAPHORE_BUFFER: + return "SMPHRBUF"; + case GraphicsAllocation::AllocationType::DEBUG_CONTEXT_SAVE_AREA: + return "DBCXSVAR"; + case GraphicsAllocation::AllocationType::DEBUG_SBA_TRACKING_BUFFER: + return "DBSBATRB"; + case GraphicsAllocation::AllocationType::DEBUG_MODULE_AREA: + return "DBMDLARE"; + case GraphicsAllocation::AllocationType::UNIFIED_SHARED_MEMORY: + return "USHRDMEM"; + case GraphicsAllocation::AllocationType::WORK_PARTITION_SURFACE: + return "WRPRTSRF"; + case GraphicsAllocation::AllocationType::GPU_TIMESTAMP_DEVICE_BUFFER: + return "GPUTSDBF"; + default: + return "NOTFOUND"; + } +} + +} // namespace NEO diff --git a/shared/source/kernel/CMakeLists.txt b/shared/source/kernel/CMakeLists.txt index 64c4077986..b3f1d85e74 100644 --- a/shared/source/kernel/CMakeLists.txt +++ b/shared/source/kernel/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2019-2020 Intel Corporation +# Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT # @@ -17,6 +17,7 @@ set(NEO_CORE_KERNEL ${CMAKE_CURRENT_SOURCE_DIR}/kernel_descriptor.h ${CMAKE_CURRENT_SOURCE_DIR}/kernel_descriptor_from_patchtokens.cpp ${CMAKE_CURRENT_SOURCE_DIR}/kernel_descriptor_from_patchtokens.h + ${CMAKE_CURRENT_SOURCE_DIR}/kernel_properties.h ${CMAKE_CURRENT_SOURCE_DIR}/read_extended_info.h ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/read_extended_info.cpp ) diff --git a/shared/source/kernel/grf_config.h b/shared/source/kernel/grf_config.h index 3f6e3643f3..84023cbac0 100644 --- a/shared/source/kernel/grf_config.h +++ b/shared/source/kernel/grf_config.h @@ -11,5 +11,6 @@ namespace GrfConfig { constexpr uint32_t DefaultGrfNumber = 128u; +constexpr uint32_t LargeGrfNumber = 256u; constexpr uint32_t NotApplicable = 0u; } // namespace GrfConfig diff --git a/shared/source/kernel/kernel_descriptor.h b/shared/source/kernel/kernel_descriptor.h index ae66d06fa7..38a26773f4 100644 --- a/shared/source/kernel/kernel_descriptor.h +++ b/shared/source/kernel/kernel_descriptor.h @@ -28,6 +28,7 @@ using InstructionsSegmentOffset = uint16_t; struct ExtendedInfoBase { virtual ~ExtendedInfoBase() = default; + virtual bool specialPipelineSelectModeRequired() const { return false; } }; struct KernelDescriptor final { diff --git a/shared/source/kernel/kernel_properties.h b/shared/source/kernel/kernel_properties.h new file mode 100644 index 0000000000..751f3f5e53 --- /dev/null +++ b/shared/source/kernel/kernel_properties.h @@ -0,0 +1,12 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once + +#include + +constexpr uint64_t FP_ATOMIC_EXT_FLAG_GLOBAL_ADD = 1 << 1; // Supports atomic add and subtract diff --git a/shared/source/memory_manager/CMakeLists.txt b/shared/source/memory_manager/CMakeLists.txt index eaa438faf1..87cab046a6 100644 --- a/shared/source/memory_manager/CMakeLists.txt +++ b/shared/source/memory_manager/CMakeLists.txt @@ -19,8 +19,8 @@ set(NEO_CORE_MEMORY_MANAGER ${CMAKE_CURRENT_SOURCE_DIR}/deferred_deleter.cpp ${CMAKE_CURRENT_SOURCE_DIR}/deferred_deleter.h ${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/engine_limits.h - ${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/storage_info.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/storage_info.h + ${CMAKE_CURRENT_SOURCE_DIR}/definitions/storage_info.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/definitions/storage_info.h ${CMAKE_CURRENT_SOURCE_DIR}/eviction_status.h ${CMAKE_CURRENT_SOURCE_DIR}/gfx_partition.cpp ${CMAKE_CURRENT_SOURCE_DIR}/${BRANCH_DIR_SUFFIX}/gfx_partition_init_additional_range.cpp diff --git a/shared/source/memory_manager/definitions/storage_info.cpp b/shared/source/memory_manager/definitions/storage_info.cpp index 3214249df5..e21e3eb7ec 100644 --- a/shared/source/memory_manager/definitions/storage_info.cpp +++ b/shared/source/memory_manager/definitions/storage_info.cpp @@ -5,11 +5,135 @@ * */ +#include "shared/source/execution_environment/root_device_environment.h" +#include "shared/source/helpers/app_resource_helper.h" +#include "shared/source/helpers/hw_helper.h" #include "shared/source/memory_manager/memory_manager.h" +#include + namespace NEO { StorageInfo MemoryManager::createStorageInfoFromProperties(const AllocationProperties &properties) { - return {}; + if (properties.subDevicesBitfield.count() == 0) { + return {}; + } + + const auto deviceCount = HwHelper::getSubDevicesCount(executionEnvironment.rootDeviceEnvironments[properties.rootDeviceIndex]->getHardwareInfo()); + const auto leastOccupiedBank = localMemoryUsageBankSelector[properties.rootDeviceIndex]->getLeastOccupiedBank(properties.subDevicesBitfield); + const auto subDevicesMask = executionEnvironment.rootDeviceEnvironments[properties.rootDeviceIndex]->deviceAffinityMask.getGenericSubDevicesMask().to_ulong(); + + const DeviceBitfield allTilesValue(properties.subDevicesBitfield.count() == 1 + ? maxNBitValue(deviceCount) & subDevicesMask + : properties.subDevicesBitfield); + DeviceBitfield preferredTile; + if (properties.subDevicesBitfield.count() == 1) { + preferredTile = properties.subDevicesBitfield; + } else { + UNRECOVERABLE_IF(!properties.subDevicesBitfield.test(leastOccupiedBank)); + preferredTile.set(leastOccupiedBank); + } + + StorageInfo storageInfo{preferredTile, allTilesValue}; + storageInfo.isLockable = GraphicsAllocation::isLockable(properties.allocationType); + storageInfo.cpuVisibleSegment = GraphicsAllocation::isCpuAccessRequired(properties.allocationType); + + AppResourceHelper::copyResourceTagStr(storageInfo.resourceTag, properties.allocationType, + sizeof(storageInfo.resourceTag)); + + switch (properties.allocationType) { + case GraphicsAllocation::AllocationType::KERNEL_ISA: + case GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL: + case GraphicsAllocation::AllocationType::DEBUG_MODULE_AREA: { + auto placeIsaOnMultiTile = true; + + if (executionEnvironment.isDebuggingEnabled() && + executionEnvironment.rootDeviceEnvironments[properties.rootDeviceIndex]->debugger.get()) { + placeIsaOnMultiTile = false; + } + + if (DebugManager.flags.MultiTileIsaPlacement.get() != -1) { + placeIsaOnMultiTile = !!DebugManager.flags.MultiTileIsaPlacement.get(); + } + if (placeIsaOnMultiTile) { + storageInfo.cloningOfPageTables = false; + storageInfo.memoryBanks = allTilesValue; + storageInfo.tileInstanced = true; + } else { + storageInfo.cloningOfPageTables = true; + storageInfo.memoryBanks = 0x1; + storageInfo.tileInstanced = false; + } + } break; + case GraphicsAllocation::AllocationType::DEBUG_CONTEXT_SAVE_AREA: + case GraphicsAllocation::AllocationType::PRIVATE_SURFACE: + case GraphicsAllocation::AllocationType::WORK_PARTITION_SURFACE: + storageInfo.cloningOfPageTables = false; + storageInfo.memoryBanks = allTilesValue; + storageInfo.tileInstanced = true; + break; + case GraphicsAllocation::AllocationType::COMMAND_BUFFER: + case GraphicsAllocation::AllocationType::INTERNAL_HEAP: + case GraphicsAllocation::AllocationType::LINEAR_STREAM: + storageInfo.cloningOfPageTables = properties.flags.multiOsContextCapable; + storageInfo.memoryBanks = preferredTile; + if (!properties.flags.multiOsContextCapable) { + storageInfo.pageTablesVisibility = preferredTile; + } + break; + case GraphicsAllocation::AllocationType::SCRATCH_SURFACE: + case GraphicsAllocation::AllocationType::PREEMPTION: + if (properties.flags.multiOsContextCapable) { + storageInfo.cloningOfPageTables = false; + storageInfo.memoryBanks = allTilesValue; + storageInfo.tileInstanced = true; + } else { + storageInfo.memoryBanks = preferredTile; + storageInfo.pageTablesVisibility = preferredTile; + } + break; + case GraphicsAllocation::AllocationType::GPU_TIMESTAMP_DEVICE_BUFFER: + if (properties.flags.multiOsContextCapable) { + storageInfo.memoryBanks = allTilesValue; + storageInfo.cloningOfPageTables = true; + } else { + storageInfo.memoryBanks = preferredTile; + storageInfo.pageTablesVisibility = preferredTile; + storageInfo.cloningOfPageTables = false; + } + break; + case GraphicsAllocation::AllocationType::BUFFER: + case GraphicsAllocation::AllocationType::BUFFER_COMPRESSED: + case GraphicsAllocation::AllocationType::SVM_GPU: + if (this->supportsMultiStorageResources && + properties.multiStorageResource && + properties.size >= deviceCount * MemoryConstants::pageSize64k && + properties.subDevicesBitfield.count() != 1u) { + storageInfo.memoryBanks = allTilesValue; + storageInfo.multiStorage = true; + if (DebugManager.flags.OverrideMultiStoragePlacement.get() != -1) { + storageInfo.memoryBanks = DebugManager.flags.OverrideMultiStoragePlacement.get(); + } + } + if (properties.flags.readOnlyMultiStorage) { + storageInfo.readOnlyMultiStorage = true; + storageInfo.cloningOfPageTables = false; + storageInfo.memoryBanks = allTilesValue; + storageInfo.tileInstanced = true; + } + storageInfo.localOnlyRequired = true; + break; + case GraphicsAllocation::AllocationType::UNIFIED_SHARED_MEMORY: + storageInfo.memoryBanks = allTilesValue; + break; + default: + break; + } + return storageInfo; +} +uint32_t StorageInfo::getNumBanks() const { + if (memoryBanks == 0) { + return 1u; + } + return static_cast(memoryBanks.count()); } -uint32_t StorageInfo::getNumBanks() const { return 1u; } } // namespace NEO diff --git a/shared/source/memory_manager/definitions/storage_info.h b/shared/source/memory_manager/definitions/storage_info.h index 9c642fcac5..24c64ac6be 100644 --- a/shared/source/memory_manager/definitions/storage_info.h +++ b/shared/source/memory_manager/definitions/storage_info.h @@ -6,12 +6,27 @@ */ #pragma once +#include "shared/source/helpers/app_resource_defines.h" +#include "shared/source/helpers/common_types.h" + #include + namespace NEO { struct StorageInfo { + StorageInfo() = default; + StorageInfo(DeviceBitfield memoryBanks, DeviceBitfield pageTablesVisibility) + : memoryBanks(memoryBanks), pageTablesVisibility(pageTablesVisibility){}; uint32_t getNumBanks() const; - uint32_t getMemoryBanks() const { return 0u; } + DeviceBitfield memoryBanks; + DeviceBitfield pageTablesVisibility; + bool cloningOfPageTables = true; + bool tileInstanced = false; bool multiStorage = false; + bool readOnlyMultiStorage = false; + bool cpuVisibleSegment = false; bool isLockable = false; + bool localOnlyRequired = false; + char resourceTag[AppResourceDefines::maxStrLen + 1] = ""; + uint32_t getMemoryBanks() const { return static_cast(memoryBanks.to_ulong()); } }; } // namespace NEO diff --git a/shared/source/memory_manager/graphics_allocation.h b/shared/source/memory_manager/graphics_allocation.h index aded071d9c..66ee35d48a 100644 --- a/shared/source/memory_manager/graphics_allocation.h +++ b/shared/source/memory_manager/graphics_allocation.h @@ -10,13 +10,13 @@ #include "shared/source/helpers/constants.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/ptr_math.h" +#include "shared/source/memory_manager/definitions/storage_info.h" #include "shared/source/memory_manager/host_ptr_defines.h" #include "shared/source/memory_manager/memory_pool.h" #include "shared/source/utilities/idlist.h" #include "shared/source/utilities/stackvec.h" #include "engine_limits.h" -#include "storage_info.h" #include #include diff --git a/shared/source/memory_manager/memory_banks.h b/shared/source/memory_manager/memory_banks.h index bb61706eaa..c24f9aabb1 100644 --- a/shared/source/memory_manager/memory_banks.h +++ b/shared/source/memory_manager/memory_banks.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -11,9 +11,12 @@ namespace MemoryBanks { constexpr uint32_t BankNotSpecified{0}; constexpr uint32_t MainBank{0}; -constexpr uint32_t Bank0{1}; inline uint32_t getBank(uint32_t deviceOrdinal) { return MemoryBanks::MainBank; } + +inline uint32_t getBankForLocalMemory(uint32_t deviceOrdinal) { + return deviceOrdinal + 1; +} } // namespace MemoryBanks diff --git a/shared/source/memory_manager/physical_address_allocator.h b/shared/source/memory_manager/physical_address_allocator.h index da41ec8599..f6113f40cf 100644 --- a/shared/source/memory_manager/physical_address_allocator.h +++ b/shared/source/memory_manager/physical_address_allocator.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -49,4 +49,58 @@ class PhysicalAddressAllocator { const uint64_t initialPageAddress = 0x1000; }; +template +class PhysicalAddressAllocatorHw : public PhysicalAddressAllocator { + + public: + PhysicalAddressAllocatorHw(uint64_t bankSize, uint32_t numOfBanks) : memoryBankSize(bankSize), numberOfBanks(numOfBanks) { + if (numberOfBanks > 0) { + bankAllocators = new std::atomic[numberOfBanks]; + bankAllocators[0].store(initialPageAddress); + + for (uint32_t i = 1; i < numberOfBanks; i++) { + bankAllocators[i].store(i * memoryBankSize); + } + } + } + + ~PhysicalAddressAllocatorHw() override { + if (bankAllocators) { + delete bankAllocators; + } + } + + uint64_t reservePage(uint32_t memoryBank, size_t pageSize, size_t alignement) override { + std::unique_lock lock(pageReserveMutex); + + if (memoryBank == MemoryBanks::MainBank || numberOfBanks == 0) { + auto currentAddress = mainAllocator.load(); + auto alignmentSize = alignUp(currentAddress, alignement) - currentAddress; + mainAllocator += alignmentSize; + return mainAllocator.fetch_add(pageSize); + } + UNRECOVERABLE_IF(memoryBank > numberOfBanks); + + auto index = memoryBank - MemoryBanks::getBankForLocalMemory(0); + + auto currentAddress = bankAllocators[index].load(); + auto alignmentSize = alignUp(currentAddress, alignement) - currentAddress; + bankAllocators[index] += alignmentSize; + + auto address = bankAllocators[index].fetch_add(pageSize); + + UNRECOVERABLE_IF(address > ((index + 1) * memoryBankSize)); + + return address; + } + + uint64_t getBankSize() { return memoryBankSize; } + uint32_t getNumberOfBanks() { return numberOfBanks; } + + protected: + std::atomic *bankAllocators = nullptr; + uint64_t memoryBankSize = 0; + uint32_t numberOfBanks = 0; +}; + } // namespace NEO diff --git a/shared/source/os_interface/CMakeLists.txt b/shared/source/os_interface/CMakeLists.txt index 1a78da8f87..3c107bcbf4 100644 --- a/shared/source/os_interface/CMakeLists.txt +++ b/shared/source/os_interface/CMakeLists.txt @@ -39,5 +39,9 @@ set(NEO_CORE_OS_INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/os_inc_base.h ) +if(SUPPORT_XEHP_PLUS) + list(APPEND NEO_CORE_OS_INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_xehp_plus.inl) +endif() + set_property(GLOBAL PROPERTY NEO_CORE_OS_INTERFACE ${NEO_CORE_OS_INTERFACE}) add_subdirectories() diff --git a/shared/source/os_interface/hw_info_config_xehp_plus.inl b/shared/source/os_interface/hw_info_config_xehp_plus.inl new file mode 100644 index 0000000000..76bb6b6193 --- /dev/null +++ b/shared/source/os_interface/hw_info_config_xehp_plus.inl @@ -0,0 +1,27 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/os_interface/hw_info_config.h" +namespace NEO { + +template +uint64_t HwInfoConfigHw::getHostMemCapabilitiesValue() { + return (UNIFIED_SHARED_MEMORY_ACCESS); +} + +template +uint64_t HwInfoConfigHw::getCrossDeviceSharedMemCapabilities() { + return (UNIFIED_SHARED_MEMORY_ACCESS | UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS); +} + +template +void HwInfoConfigHw::enableRenderCompression(HardwareInfo *hwInfo) { + hwInfo->capabilityTable.ftrRenderCompressedImages = hwInfo->featureTable.ftrE2ECompression; + hwInfo->capabilityTable.ftrRenderCompressedBuffers = hwInfo->featureTable.ftrE2ECompression; +} + +} // namespace NEO diff --git a/shared/source/os_interface/linux/drm_memory_manager_allocate_in_device_pool_dg1.cpp b/shared/source/os_interface/linux/drm_memory_manager_allocate_in_device_pool_dg1.cpp index ae2473c268..0d00badf51 100644 --- a/shared/source/os_interface/linux/drm_memory_manager_allocate_in_device_pool_dg1.cpp +++ b/shared/source/os_interface/linux/drm_memory_manager_allocate_in_device_pool_dg1.cpp @@ -354,7 +354,7 @@ uint64_t DrmMemoryManager::getLocalMemorySize(uint32_t rootDeviceIndex, uint32_t if (!memoryInfo) { return 0; } - return memoryInfo->getMemoryRegionSize(MemoryBanks::Bank0); + return memoryInfo->getMemoryRegionSize(MemoryBanks::getBankForLocalMemory(0)); } } // namespace NEO diff --git a/shared/source/sku_info/sku_info_base.h b/shared/source/sku_info/sku_info_base.h index 95e94fb4dc..2b03902672 100644 --- a/shared/source/sku_info/sku_info_base.h +++ b/shared/source/sku_info/sku_info_base.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -101,6 +101,9 @@ struct FeatureTableBase { bool ftrRcsNode : 1; bool ftrLocalMemory : 1; bool ftrLocalMemoryAllows4KB : 1; + bool ftrFlatPhysCCS : 1; + bool ftrMultiTileArch : 1; + bool ftrCCSMultiInstance : 1; }; uint64_t packed[2]; }; diff --git a/shared/source/xe_hp_core/aub_mapper.h b/shared/source/xe_hp_core/aub_mapper.h new file mode 100644 index 0000000000..cc45bdcc47 --- /dev/null +++ b/shared/source/xe_hp_core/aub_mapper.h @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once +#include "shared/source/aub/aub_mapper_base.h" +#include "shared/source/helpers/constants.h" + +#include "engine_node.h" + +namespace NEO { +struct XeHpFamily; + +template <> +struct AUBFamilyMapper { + enum { device = AubMemDump::DeviceValues::XeHP_SDV }; + + using AubTraits = AubMemDump::Traits; + + static const AubMemDump::LrcaHelper *const csTraits[aub_stream::NUM_ENGINES]; + + static const MMIOList globalMMIO; + static const MMIOList *perEngineMMIO[aub_stream::NUM_ENGINES]; + + typedef AubMemDump::AubDump AUB; +}; +} // namespace NEO diff --git a/shared/source/xe_hp_core/command_encoder_xe_hp_core.cpp b/shared/source/xe_hp_core/command_encoder_xe_hp_core.cpp new file mode 100644 index 0000000000..18057cb14d --- /dev/null +++ b/shared/source/xe_hp_core/command_encoder_xe_hp_core.cpp @@ -0,0 +1,76 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/command_container/command_encoder.h" +#include "shared/source/command_container/command_encoder.inl" +#include "shared/source/command_container/command_encoder_xehp_plus.inl" +#include "shared/source/command_container/encode_compute_mode_tgllp_plus.inl" +#include "shared/source/command_container/implicit_scaling.h" +#include "shared/source/command_container/implicit_scaling_xehp_plus.inl" +#include "shared/source/helpers/hw_helper.h" +#include "shared/source/xe_hp_core/hw_cmds_base.h" + +namespace NEO { + +using Family = XeHpFamily; +} + +#include "shared/source/command_container/image_surface_state/compression_params_tgllp_plus.inl" +#include "shared/source/command_container/image_surface_state/compression_params_xehp_plus.inl" + +namespace NEO { + +template <> +void EncodeDispatchKernel::adjustTimestampPacket(WALKER_TYPE &walkerCmd, const HardwareInfo &hwInfo) { +} + +template <> +inline void EncodeSurfaceState::encodeExtraCacheSettings(R_SURFACE_STATE *surfaceState, const HardwareInfo &hwInfo) { +} + +template <> +void EncodeDispatchKernel::appendAdditionalIDDFields(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const HardwareInfo &hwInfo, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) { +} + +template <> +void EncodeDispatchKernel::programBarrierEnable(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo) { + interfaceDescriptor.setBarrierEnable(value); +} + +template <> +void EncodeDispatchKernel::adjustInterfaceDescriptorData(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const HardwareInfo &hwInfo) { + auto &helper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); + + if (helper.getSteppingFromHwRevId(hwInfo) >= REVISION_B) { + interfaceDescriptor.setThreadGroupDispatchSize(3u); + } + + if (DebugManager.flags.ForceThreadGroupDispatchSize.get() != -1) { + interfaceDescriptor.setThreadGroupDispatchSize(DebugManager.flags.ForceThreadGroupDispatchSize.get()); + } +} + +template struct EncodeDispatchKernel; +template struct EncodeStates; +template struct EncodeMath; +template struct EncodeMathMMIO; +template struct EncodeIndirectParams; +template struct EncodeSetMMIO; +template struct EncodeMediaInterfaceDescriptorLoad; +template struct EncodeStateBaseAddress; +template struct EncodeStoreMMIO; +template struct EncodeSurfaceState; +template struct EncodeComputeMode; +template struct EncodeAtomic; +template struct EncodeSempahore; +template struct EncodeBatchBufferStartOrEnd; +template struct EncodeMiFlushDW; +template struct EncodeMemoryPrefetch; +template struct EncodeMiArbCheck; +template struct EncodeWA; +template struct ImplicitScalingDispatch; +} // namespace NEO diff --git a/shared/source/xe_hp_core/command_stream_receiver_hw_xe_hp_core.cpp b/shared/source/xe_hp_core/command_stream_receiver_hw_xe_hp_core.cpp new file mode 100644 index 0000000000..e4e0e06ea2 --- /dev/null +++ b/shared/source/xe_hp_core/command_stream_receiver_hw_xe_hp_core.cpp @@ -0,0 +1,151 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/xe_hp_core/hw_cmds.h" +#include "shared/source/xe_hp_core/hw_info.h" + +using Family = NEO::XeHpFamily; + +#include "shared/source/command_stream/command_stream_receiver_hw_tgllp_plus.inl" +#include "shared/source/command_stream/command_stream_receiver_hw_xehp_plus.inl" +#include "shared/source/helpers/blit_commands_helper_xehp_plus.inl" +#include "shared/source/helpers/populate_factory.h" + +namespace NEO { + +static auto gfxCore = IGFX_XE_HP_CORE; + +template <> +bool ImplicitFlushSettings::defaultSettingForNewResource = true; +template <> +bool ImplicitFlushSettings::defaultSettingForGpuIdle = true; +template class ImplicitFlushSettings; + +template <> +void populateFactoryTable>() { + extern CommandStreamReceiverCreateFunc commandStreamReceiverFactory[2 * IGFX_MAX_CORE]; + commandStreamReceiverFactory[gfxCore] = DeviceCommandStreamReceiver::create; +} + +template <> +MemoryCompressionState CommandStreamReceiverHw::getMemoryCompressionState(bool auxTranslationRequired) const { + auto memoryCompressionState = MemoryCompressionState::NotApplicable; + if (DebugManager.flags.EnableStatelessCompression.get()) { + memoryCompressionState = auxTranslationRequired ? MemoryCompressionState::Disabled : MemoryCompressionState::Enabled; + } + return memoryCompressionState; +} + +template <> +GraphicsAllocation *CommandStreamReceiverHw::getClearColorAllocation() { + constexpr uint32_t clearColorSize = 16u; + static uint8_t clearColorBuffer[clearColorSize]; + if (DebugManager.flags.UseClearColorAllocationForBlitter.get()) { + if (clearColorAllocation == nullptr) { + auto lock = this->obtainUniqueOwnership(); + if (clearColorAllocation == nullptr) { + AllocationProperties properties{rootDeviceIndex, clearColorSize, GraphicsAllocation::AllocationType::BUFFER, osContext->getDeviceBitfield()}; + properties.flags.readOnlyMultiStorage = true; + properties.flags.flushL3RequiredForRead = properties.flags.flushL3RequiredForWrite = false; + clearColorAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(properties, clearColorBuffer); + } + } + } + return clearColorAllocation; +} + +template <> +void CommandStreamReceiverHw::programPerDssBackedBuffer(LinearStream &commandStream, Device &device, DispatchFlags &dispatchFlags) { +} + +template <> +size_t CommandStreamReceiverHw::getCmdSizeForPerDssBackedBuffer(const HardwareInfo &hwInfo) { + return 0; +} + +template <> +void BlitCommandsHelper::appendClearColor(const BlitProperties &blitProperties, typename Family::XY_COPY_BLT &blitCmd) { + using XY_COPY_BLT = typename Family::XY_COPY_BLT; + if (DebugManager.flags.UseClearColorAllocationForBlitter.get()) { + blitCmd.setSourceClearValueEnable(XY_COPY_BLT::CLEAR_VALUE_ENABLE::CLEAR_VALUE_ENABLE_ENABLE); + blitCmd.setDestinationClearValueEnable(XY_COPY_BLT::CLEAR_VALUE_ENABLE::CLEAR_VALUE_ENABLE_ENABLE); + auto clearColorAddress = blitProperties.clearColorAllocation->getGpuAddress(); + blitCmd.setSourceClearAddressLow(static_cast(clearColorAddress & 0xFFFFFFFFULL)); + blitCmd.setSourceClearAddressHigh(static_cast(clearColorAddress >> 32)); + blitCmd.setDestinationClearAddressLow(static_cast(clearColorAddress & 0xFFFFFFFFULL)); + blitCmd.setDestinationClearAddressHigh(static_cast(clearColorAddress >> 32)); + } +} + +template class CommandStreamReceiverHw; + +template <> +void BlitCommandsHelper::appendExtraMemoryProperties(typename Family::XY_COPY_BLT &blitCmd, const RootDeviceEnvironment &rootDeviceEnvironment) { + using XY_COPY_BLT = typename Family::XY_COPY_BLT; + + auto hwInfo = rootDeviceEnvironment.getHardwareInfo(); + auto &hwHelper = HwHelperHw::get(); + + if (hwHelper.isWorkaroundRequired(REVISION_A0, REVISION_B, *hwInfo) && hwHelper.getLocalMemoryAccessMode(*hwInfo) == LocalMemoryAccessMode::CpuAccessAllowed) { + blitCmd.setSourceTargetMemory(XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_SYSTEM_MEM); + blitCmd.setDestinationTargetMemory(XY_COPY_BLT::TARGET_MEMORY::TARGET_MEMORY_SYSTEM_MEM); + } +} + +template <> +void BlitCommandsHelper::appendExtraMemoryProperties(typename Family::XY_COLOR_BLT &blitCmd, const RootDeviceEnvironment &rootDeviceEnvironment) { + using XY_COLOR_BLT = typename Family::XY_COLOR_BLT; + + auto hwInfo = rootDeviceEnvironment.getHardwareInfo(); + auto &hwHelper = HwHelperHw::get(); + + if (hwHelper.isWorkaroundRequired(REVISION_A0, REVISION_B, *hwInfo) && + hwHelper.getLocalMemoryAccessMode(*hwInfo) == LocalMemoryAccessMode::CpuAccessAllowed) { + blitCmd.setDestinationTargetMemory(XY_COLOR_BLT::DESTINATION_TARGET_MEMORY::DESTINATION_TARGET_MEMORY_SYSTEM_MEM); + } +} + +template struct BlitCommandsHelper; + +const Family::COMPUTE_WALKER Family::cmdInitGpgpuWalker = Family::COMPUTE_WALKER::sInit(); +const Family::CFE_STATE Family::cmdInitCfeState = Family::CFE_STATE::sInit(); +const Family::INTERFACE_DESCRIPTOR_DATA Family::cmdInitInterfaceDescriptorData = Family::INTERFACE_DESCRIPTOR_DATA::sInit(); +const Family::MI_BATCH_BUFFER_START Family::cmdInitBatchBufferStart = Family::MI_BATCH_BUFFER_START::sInit(); +const Family::MI_BATCH_BUFFER_END Family::cmdInitBatchBufferEnd = Family::MI_BATCH_BUFFER_END::sInit(); +const Family::PIPE_CONTROL Family::cmdInitPipeControl = Family::PIPE_CONTROL::sInit(); +const Family::STATE_COMPUTE_MODE Family::cmdInitStateComputeMode = Family::STATE_COMPUTE_MODE::sInit(); +const Family::_3DSTATE_BINDING_TABLE_POOL_ALLOC Family::cmdInitStateBindingTablePoolAlloc = + Family::_3DSTATE_BINDING_TABLE_POOL_ALLOC::sInit(); +const Family::MI_SEMAPHORE_WAIT Family::cmdInitMiSemaphoreWait = Family::MI_SEMAPHORE_WAIT::sInit(); +const Family::RENDER_SURFACE_STATE Family::cmdInitRenderSurfaceState = Family::RENDER_SURFACE_STATE::sInit(); +const Family::POSTSYNC_DATA Family::cmdInitPostSyncData = Family::POSTSYNC_DATA::sInit(); +const Family::MI_SET_PREDICATE Family::cmdInitSetPredicate = Family::MI_SET_PREDICATE::sInit(); +const Family::MI_LOAD_REGISTER_IMM Family::cmdInitLoadRegisterImm = Family::MI_LOAD_REGISTER_IMM::sInit(); +const Family::MI_LOAD_REGISTER_REG Family::cmdInitLoadRegisterReg = Family::MI_LOAD_REGISTER_REG::sInit(); +const Family::MI_LOAD_REGISTER_MEM Family::cmdInitLoadRegisterMem = Family::MI_LOAD_REGISTER_MEM::sInit(); +const Family::MI_STORE_DATA_IMM Family::cmdInitStoreDataImm = Family::MI_STORE_DATA_IMM::sInit(); +const Family::MI_STORE_REGISTER_MEM Family::cmdInitStoreRegisterMem = Family::MI_STORE_REGISTER_MEM::sInit(); +const Family::MI_NOOP Family::cmdInitNoop = Family::MI_NOOP::sInit(); +const Family::MI_REPORT_PERF_COUNT Family::cmdInitReportPerfCount = Family::MI_REPORT_PERF_COUNT::sInit(); +const Family::MI_ATOMIC Family::cmdInitAtomic = Family::MI_ATOMIC::sInit(); +const Family::PIPELINE_SELECT Family::cmdInitPipelineSelect = Family::PIPELINE_SELECT::sInit(); +const Family::MI_ARB_CHECK Family::cmdInitArbCheck = Family::MI_ARB_CHECK::sInit(); +const Family::STATE_BASE_ADDRESS Family::cmdInitStateBaseAddress = Family::STATE_BASE_ADDRESS::sInit(); +const Family::MEDIA_SURFACE_STATE Family::cmdInitMediaSurfaceState = Family::MEDIA_SURFACE_STATE::sInit(); +const Family::SAMPLER_STATE Family::cmdInitSamplerState = Family::SAMPLER_STATE::sInit(); +const Family::BINDING_TABLE_STATE Family::cmdInitBindingTableState = Family::BINDING_TABLE_STATE::sInit(); +const Family::MI_USER_INTERRUPT Family::cmdInitUserInterrupt = Family::MI_USER_INTERRUPT::sInit(); +const Family::MI_CONDITIONAL_BATCH_BUFFER_END cmdInitConditionalBatchBufferEnd = Family::MI_CONDITIONAL_BATCH_BUFFER_END::sInit(); +const Family::L3_CONTROL Family::cmdInitL3Control = Family::L3_CONTROL::sInit(); +const Family::L3_FLUSH_ADDRESS_RANGE Family::cmdInitL3FlushAddressRange = Family::L3_FLUSH_ADDRESS_RANGE::sInit(); +const Family::MI_FLUSH_DW Family::cmdInitMiFlushDw = Family::MI_FLUSH_DW::sInit(); +const Family::XY_BLOCK_COPY_BLT Family::cmdInitXyCopyBlt = Family::XY_BLOCK_COPY_BLT::sInit(); +const Family::XY_FAST_COLOR_BLT Family::cmdInitXyColorBlt = Family::XY_FAST_COLOR_BLT::sInit(); +const Family::_3DSTATE_BTD Family::cmd3dStateBtd = Family::_3DSTATE_BTD::sInit(); +const Family::_3DSTATE_BTD_BODY Family::cmd3dStateBtdBody = Family::_3DSTATE_BTD_BODY::sInit(); +const Family::STATE_SIP Family::cmdInitStateSip = Family::STATE_SIP::sInit(); +} // namespace NEO diff --git a/shared/source/xe_hp_core/enable_family_full_core_xe_hp_core.cpp b/shared/source/xe_hp_core/enable_family_full_core_xe_hp_core.cpp new file mode 100644 index 0000000000..07fa1ccef2 --- /dev/null +++ b/shared/source/xe_hp_core/enable_family_full_core_xe_hp_core.cpp @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/helpers/hw_helper.h" +#include "shared/source/xe_hp_core/hw_cmds.h" + +namespace NEO { + +extern HwHelper *hwHelperFactory[IGFX_MAX_CORE]; + +typedef XeHpFamily Family; +static auto gfxFamily = IGFX_XE_HP_CORE; + +struct EnableCoreXeHpCore { + EnableCoreXeHpCore() { + hwHelperFactory[gfxFamily] = &HwHelperHw::get(); + } +}; + +static EnableCoreXeHpCore enable; +} // namespace NEO diff --git a/shared/source/xe_hp_core/enable_hw_info_config_xe_hp_core.cpp b/shared/source/xe_hp_core/enable_hw_info_config_xe_hp_core.cpp new file mode 100644 index 0000000000..c208ce86ce --- /dev/null +++ b/shared/source/xe_hp_core/enable_hw_info_config_xe_hp_core.cpp @@ -0,0 +1,16 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/os_interface/hw_info_config.h" +#include "shared/source/xe_hp_core/hw_cmds.h" + +namespace NEO { + +#ifdef SUPPORT_XEHP +static EnableProductHwInfoConfig enableXEHP; +#endif +} // namespace NEO diff --git a/shared/source/xe_hp_core/enable_xe_hp_core.cpp b/shared/source/xe_hp_core/enable_xe_hp_core.cpp new file mode 100644 index 0000000000..56b6ef1345 --- /dev/null +++ b/shared/source/xe_hp_core/enable_xe_hp_core.cpp @@ -0,0 +1,18 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/os_interface/hw_info_config.h" +#include "shared/source/xe_hp_core/hw_cmds.h" + +#include "opencl/source/helpers/enable_product.inl" + +namespace NEO { + +#ifdef SUPPORT_XEHP +static EnableGfxProductHw enableGfxProductHwXEHP; +#endif +} // namespace NEO diff --git a/shared/source/xe_hp_core/hw_cmds.h b/shared/source/xe_hp_core/hw_cmds.h new file mode 100644 index 0000000000..493ba828d0 --- /dev/null +++ b/shared/source/xe_hp_core/hw_cmds.h @@ -0,0 +1,11 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once +#ifdef SUPPORT_XEHP +#include "hw_cmds_xehp.h" +#endif diff --git a/shared/source/xe_hp_core/hw_cmds_base.h b/shared/source/xe_hp_core/hw_cmds_base.h new file mode 100644 index 0000000000..8280b3f735 --- /dev/null +++ b/shared/source/xe_hp_core/hw_cmds_base.h @@ -0,0 +1,106 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once +#include "shared/source/commands/bxml_generator_glue.h" +#include "shared/source/helpers/debug_helpers.h" +#include "shared/source/xe_hp_core/hw_info.h" + +#include "igfxfmid.h" + +#include +#include + +template +struct CmdParse; +namespace NEO { + +struct XeHpCore { +#include "shared/source/generated/xe_hp_core/hw_cmds_generated_xe_hp_core.inl" + + static constexpr uint32_t stateComputeModeForceNonCoherentMask = (0b11u << 3); + static constexpr uint32_t stateComputeModeLargeGrfModeMask = (1u << 15); + + static constexpr bool isUsingL3Control = true; + + struct DataPortBindlessSurfaceExtendedMessageDescriptor { + union { + struct { + uint32_t bindlessSurfaceOffset : 25; + uint32_t reserved : 6; + }; + uint32_t packed; + }; + + DataPortBindlessSurfaceExtendedMessageDescriptor() { + packed = 0; + } + + void setBindlessSurfaceOffset(uint32_t offsetInBindlessSurfaceHeapInBytes) { + bindlessSurfaceOffset = offsetInBindlessSurfaceHeapInBytes >> 6; + } + + uint32_t getBindlessSurfaceOffsetToPatch() { + return bindlessSurfaceOffset << 6; + } + }; + + static_assert(sizeof(DataPortBindlessSurfaceExtendedMessageDescriptor) == sizeof(DataPortBindlessSurfaceExtendedMessageDescriptor::packed), ""); +}; + +struct XeHpFamily : public XeHpCore { + using PARSE = CmdParse; + using GfxFamily = XeHpFamily; + using WALKER_TYPE = COMPUTE_WALKER; + using VFE_STATE_TYPE = CFE_STATE; + using XY_COPY_BLT = typename GfxFamily::XY_BLOCK_COPY_BLT; + using XY_COLOR_BLT = typename GfxFamily::XY_FAST_COLOR_BLT; + using MI_STORE_REGISTER_MEM_CMD = typename GfxFamily::MI_STORE_REGISTER_MEM; + using TimestampPacketType = uint32_t; + static const COMPUTE_WALKER cmdInitGpgpuWalker; + static const CFE_STATE cmdInitCfeState; + static const INTERFACE_DESCRIPTOR_DATA cmdInitInterfaceDescriptorData; + static const MI_BATCH_BUFFER_END cmdInitBatchBufferEnd; + static const MI_BATCH_BUFFER_START cmdInitBatchBufferStart; + static const PIPE_CONTROL cmdInitPipeControl; + static const STATE_COMPUTE_MODE cmdInitStateComputeMode; + static const _3DSTATE_BINDING_TABLE_POOL_ALLOC cmdInitStateBindingTablePoolAlloc; + static const MI_SEMAPHORE_WAIT cmdInitMiSemaphoreWait; + static const RENDER_SURFACE_STATE cmdInitRenderSurfaceState; + static const POSTSYNC_DATA cmdInitPostSyncData; + static const MI_SET_PREDICATE cmdInitSetPredicate; + static const MI_LOAD_REGISTER_IMM cmdInitLoadRegisterImm; + static const MI_LOAD_REGISTER_REG cmdInitLoadRegisterReg; + static const MI_LOAD_REGISTER_MEM cmdInitLoadRegisterMem; + static const MI_STORE_DATA_IMM cmdInitStoreDataImm; + static const MI_STORE_REGISTER_MEM cmdInitStoreRegisterMem; + static const MI_NOOP cmdInitNoop; + static const MI_REPORT_PERF_COUNT cmdInitReportPerfCount; + static const MI_ATOMIC cmdInitAtomic; + static const PIPELINE_SELECT cmdInitPipelineSelect; + static const MI_ARB_CHECK cmdInitArbCheck; + static const STATE_BASE_ADDRESS cmdInitStateBaseAddress; + static const MEDIA_SURFACE_STATE cmdInitMediaSurfaceState; + static const SAMPLER_STATE cmdInitSamplerState; + static const BINDING_TABLE_STATE cmdInitBindingTableState; + static const MI_USER_INTERRUPT cmdInitUserInterrupt; + static const MI_CONDITIONAL_BATCH_BUFFER_END cmdInitConditionalBatchBufferEnd; + static const L3_CONTROL cmdInitL3Control; + static const L3_FLUSH_ADDRESS_RANGE cmdInitL3FlushAddressRange; + static const MI_FLUSH_DW cmdInitMiFlushDw; + static const XY_BLOCK_COPY_BLT cmdInitXyCopyBlt; + static const XY_FAST_COLOR_BLT cmdInitXyColorBlt; + static const _3DSTATE_BTD cmd3dStateBtd; + static const _3DSTATE_BTD_BODY cmd3dStateBtdBody; + static const STATE_SIP cmdInitStateSip; + + static constexpr bool supportsCmdSet(GFXCORE_FAMILY cmdSetBaseFamily) { + return cmdSetBaseFamily == IGFX_XE_HP_CORE; + } +}; + +} // namespace NEO diff --git a/shared/source/xe_hp_core/hw_cmds_xehp.h b/shared/source/xe_hp_core/hw_cmds_xehp.h new file mode 100644 index 0000000000..48d48c1bb5 --- /dev/null +++ b/shared/source/xe_hp_core/hw_cmds_xehp.h @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once +#include "shared/source/xe_hp_core/hw_cmds_base.h" +namespace NEO { + +struct XEHP : public XeHpFamily { + static const PLATFORM platform; + static const HardwareInfo hwInfo; + static const uint64_t defaultHardwareInfoConfig; + static FeatureTable featureTable; + static WorkaroundTable workaroundTable; + static const uint32_t threadsPerEu = 8; + static const uint32_t maxEuPerSubslice = 16; + static const uint32_t maxSlicesSupported = 8; + static const uint32_t maxSubslicesSupported = 32; + static const uint32_t maxDualSubslicesSupported = 32; + static const RuntimeCapabilityTable capabilityTable; + static void (*setupHardwareInfo)(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable, uint64_t hwInfoConfig); + static void setupFeatureAndWorkaroundTable(HardwareInfo *hwInfo); +}; + +class XEHP_CONFIG : public XEHP { + public: + static void setupHardwareInfo(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable); + static void setupHardwareInfoMultiTile(HardwareInfo *hwInfo, bool setupFeatureTableAndWorkaroundTable, bool setupMultiTile); + static const HardwareInfo hwInfo; + + private: + static GT_SYSTEM_INFO gtSystemInfo; +}; +} // namespace NEO diff --git a/shared/source/xe_hp_core/hw_helper_xe_hp_core.cpp b/shared/source/xe_hp_core/hw_helper_xe_hp_core.cpp new file mode 100644 index 0000000000..02adc2bf17 --- /dev/null +++ b/shared/source/xe_hp_core/hw_helper_xe_hp_core.cpp @@ -0,0 +1,211 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/xe_hp_core/aub_mapper.h" +#include "shared/source/xe_hp_core/hw_cmds.h" + +using Family = NEO::XeHpFamily; + +#include "shared/source/helpers/constants.h" +#include "shared/source/helpers/extra_allocation_data_xehp_plus.inl" +#include "shared/source/helpers/flat_batch_buffer_helper_hw.inl" +#include "shared/source/helpers/hw_helper_base.inl" +#include "shared/source/helpers/hw_helper_tgllp_plus.inl" +#include "shared/source/helpers/hw_helper_xehp_plus.inl" + +namespace NEO { +template <> +const AuxTranslationMode HwHelperHw::defaultAuxTranslationMode = AuxTranslationMode::Blit; + +template <> +uint32_t HwHelperHw::getMetricsLibraryGenId() const { + return static_cast(MetricsLibraryApi::ClientGen::XE_HP); +} + +template <> +uint32_t HwHelperHw::getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const { + if (DebugManager.flags.OverrideNumComputeUnitsForScratch.get() != -1) { + return static_cast(DebugManager.flags.OverrideNumComputeUnitsForScratch.get()); + } + + // XeHP plus products return physical threads + return std::max(pHwInfo->gtSystemInfo.MaxSubSlicesSupported, static_cast(32)) * pHwInfo->gtSystemInfo.MaxEuPerSubSlice * (pHwInfo->gtSystemInfo.ThreadCount / pHwInfo->gtSystemInfo.EUCount); +} + +template <> +inline bool HwHelperHw::isSpecialWorkgroupSizeRequired(const HardwareInfo &hwInfo, bool isSimulation) const { + if (DebugManager.flags.ForceWorkgroupSize1x1x1.get() != -1) { + return static_cast(DebugManager.flags.ForceWorkgroupSize1x1x1.get()); + } else { + HwHelper &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); + return (!isSimulation && hwHelper.isWorkaroundRequired(REVISION_A0, REVISION_B, hwInfo) && hwHelper.getLocalMemoryAccessMode(hwInfo) == LocalMemoryAccessMode::CpuAccessAllowed); + } +} + +template <> +bool HwHelperHw::isPageTableManagerSupported(const HardwareInfo &hwInfo) const { + return false; +} + +template <> +bool HwHelperHw::isNewResidencyModelSupported() const { + return true; +} + +template <> +uint32_t HwHelperHw::getHwRevIdFromStepping(uint32_t stepping, const HardwareInfo &hwInfo) const { + if (hwInfo.platform.eProductFamily == PRODUCT_FAMILY::IGFX_XE_HP_SDV) { + switch (stepping) { + case REVISION_A0: + return 0x0; + case REVISION_A1: + return 0x1; + case REVISION_B: + return 0x4; + } + } + return CommonConstants::invalidStepping; +} + +template <> +uint32_t HwHelperHw::getSteppingFromHwRevId(const HardwareInfo &hwInfo) const { + if (hwInfo.platform.eProductFamily == PRODUCT_FAMILY::IGFX_XE_HP_SDV) { + switch (hwInfo.platform.usRevId) { + case 0x0: + return REVISION_A0; + case 0x1: + return REVISION_A1; + case 0x4: + return REVISION_B; + } + } + return CommonConstants::invalidStepping; +} + +template <> +uint32_t HwHelperHw::computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) { + using SHARED_LOCAL_MEMORY_SIZE = typename Family::INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE; + + auto slmValue = std::max(slmSize, 1024u); + slmValue = Math::nextPowerOfTwo(slmValue); + slmValue = Math::getMinLsbSet(slmValue); + slmValue = slmValue - 9; + DEBUG_BREAK_IF(slmValue > 7); + slmValue *= !!slmSize; + + return slmValue; +} + +template <> +void HwHelperHw::setL1CachePolicy(bool useL1Cache, typename Family::RENDER_SURFACE_STATE *surfaceState, const HardwareInfo *hwInfo) { +} + +template <> +inline bool HwHelperHw::allowRenderCompression(const HardwareInfo &hwInfo) const { + if (hwInfo.gtSystemInfo.EUCount == 256u) { + return false; + } + + return true; +} + +template <> +bool HwHelperHw::isBankOverrideRequired(const HardwareInfo &hwInfo) const { + + bool forceOverrideMemoryBankIndex = (HwHelper::getSubDevicesCount(&hwInfo) == 4 && + isWorkaroundRequired(REVISION_A0, REVISION_B, hwInfo)); + + if (DebugManager.flags.ForceMemoryBankIndexOverride.get() != -1) { + forceOverrideMemoryBankIndex = static_cast(DebugManager.flags.ForceMemoryBankIndexOverride.get()); + } + return forceOverrideMemoryBankIndex; +} + +template <> +bool HwHelperHw::isSipWANeeded(const HardwareInfo &hwInfo) const { + return hwInfo.platform.usRevId <= getHwRevIdFromStepping(REVISION_B, hwInfo); +} + +template <> +bool HwHelperHw::isBufferSizeSuitableForRenderCompression(const size_t size) const { + if (DebugManager.flags.EnableStatelessCompression.get()) { + return true; + } else { + return size > KB; + } +} + +template <> +LocalMemoryAccessMode HwHelperHw::getDefaultLocalMemoryAccessMode(const HardwareInfo &hwInfo) const { + if (isWorkaroundRequired(REVISION_A0, REVISION_B, hwInfo)) { + return LocalMemoryAccessMode::CpuAccessDisallowed; + } + return LocalMemoryAccessMode::Default; +} + +template <> +const StackVec HwHelperHw::getThreadsPerEUConfigs() const { + return {4, 8}; +} + +template <> +std::string HwHelperHw::getExtensions() const { + std::string extensions; + extensions += "cl_intel_dot_accumulate "; + extensions += "cl_intel_subgroup_local_block_io "; + + return extensions; +} + +template <> +void MemorySynchronizationCommands::addPipeControlWA(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo) { + using PIPE_CONTROL = typename Family::PIPE_CONTROL; + + if (DebugManager.flags.DisablePipeControlPrecedingPostSyncCommand.get() == 1) { + if (hwInfo.featureTable.ftrLocalMemory) { + PIPE_CONTROL cmd = Family::cmdInitPipeControl; + cmd.setCommandStreamerStallEnable(true); + cmd.setHdcPipelineFlush(true); + + auto pipeControl = static_cast(commandStream.getSpace(sizeof(PIPE_CONTROL))); + *pipeControl = cmd; + } + } +} + +template <> +void MemorySynchronizationCommands::setPipeControlExtraProperties(PIPE_CONTROL &pipeControl, PipeControlArgs &args) { + pipeControl.setHdcPipelineFlush(args.hdcPipelineFlush); + pipeControl.setCompressionControlSurfaceCcsFlush(args.compressionControlSurfaceCcsFlush); + + if (DebugManager.flags.FlushAllCaches.get()) { + pipeControl.setHdcPipelineFlush(true); + pipeControl.setCompressionControlSurfaceCcsFlush(true); + } + if (DebugManager.flags.DoNotFlushCaches.get()) { + pipeControl.setHdcPipelineFlush(false); + pipeControl.setCompressionControlSurfaceCcsFlush(false); + } +} + +template <> +void MemorySynchronizationCommands::setPostSyncExtraProperties(PipeControlArgs &args, const HardwareInfo &hwInfo) { + if (hwInfo.featureTable.ftrLocalMemory) { + args.hdcPipelineFlush = true; + } +} + +template <> +void MemorySynchronizationCommands::setCacheFlushExtraProperties(PipeControlArgs &args) { + args.hdcPipelineFlush = true; +} + +template class HwHelperHw; +template class FlatBatchBufferHelperHw; +template struct MemorySynchronizationCommands; +template struct LriHelper; +} // namespace NEO diff --git a/shared/source/xe_hp_core/hw_info.h b/shared/source/xe_hp_core/hw_info.h new file mode 100644 index 0000000000..74df4b951e --- /dev/null +++ b/shared/source/xe_hp_core/hw_info.h @@ -0,0 +1,11 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once +#ifdef SUPPORT_XEHP +#include "hw_info_xehp.h" +#endif diff --git a/shared/source/xe_hp_core/hw_info_xe_hp_core.h b/shared/source/xe_hp_core/hw_info_xe_hp_core.h new file mode 100644 index 0000000000..abfbadc1f3 --- /dev/null +++ b/shared/source/xe_hp_core/hw_info_xe_hp_core.h @@ -0,0 +1,20 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once +#include "shared/source/helpers/hw_info.h" + +namespace NEO { + +struct XeHpFamily; + +template <> +struct GfxFamilyMapper { + typedef XeHpFamily GfxFamily; + static const char *name; +}; +} // namespace NEO diff --git a/shared/source/xe_hp_core/hw_info_xehp.h b/shared/source/xe_hp_core/hw_info_xehp.h new file mode 100644 index 0000000000..3bc8437b3e --- /dev/null +++ b/shared/source/xe_hp_core/hw_info_xehp.h @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once +#include "hw_info_xe_hp_core.h" + +namespace NEO { + +struct XEHP; + +template <> +struct HwMapper { + enum { gfxFamily = IGFX_XE_HP_CORE }; + + static const char *abbreviation; + typedef GfxFamilyMapper(gfxFamily)>::GfxFamily GfxFamily; + typedef XEHP GfxProduct; +}; +} // namespace NEO diff --git a/shared/source/xe_hp_core/image_core_xe_hp_core.cpp b/shared/source/xe_hp_core/image_core_xe_hp_core.cpp new file mode 100644 index 0000000000..78edb02f54 --- /dev/null +++ b/shared/source/xe_hp_core/image_core_xe_hp_core.cpp @@ -0,0 +1,20 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/image/image_surface_state.h" +#include "shared/source/xe_hp_core/hw_cmds_base.h" + +namespace NEO { + +using Family = XeHpFamily; + +template <> +void setFilterMode(Family::RENDER_SURFACE_STATE *surfaceState, const HardwareInfo *hwInfo) {} +// clang-format off +#include "shared/source/image/image_skl_plus.inl" +// clang-format on +} // namespace NEO diff --git a/shared/source/xe_hp_core/linux/direct_submission_xe_hp_core.cpp b/shared/source/xe_hp_core/linux/direct_submission_xe_hp_core.cpp new file mode 100644 index 0000000000..1d778cef79 --- /dev/null +++ b/shared/source/xe_hp_core/linux/direct_submission_xe_hp_core.cpp @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/direct_submission/direct_submission_hw.h" +#include "shared/source/direct_submission/dispatchers/blitter_dispatcher.inl" +#include "shared/source/direct_submission/dispatchers/dispatcher.inl" +#include "shared/source/direct_submission/dispatchers/render_dispatcher.inl" +#include "shared/source/direct_submission/linux/drm_direct_submission.inl" + +#include "hw_cmds.h" + +namespace NEO { +using GfxFamily = XeHpFamily; + +template class DrmDirectSubmission>; +template class DrmDirectSubmission>; +} // namespace NEO diff --git a/shared/source/xe_hp_core/linux/hw_info_config_xe_hp_core.cpp b/shared/source/xe_hp_core/linux/hw_info_config_xe_hp_core.cpp new file mode 100644 index 0000000000..65965f5a72 --- /dev/null +++ b/shared/source/xe_hp_core/linux/hw_info_config_xe_hp_core.cpp @@ -0,0 +1,13 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/os_interface/hw_info_config.inl" +#include "shared/source/os_interface/hw_info_config_xehp_plus.inl" + +#ifdef SUPPORT_XEHP +#include "hw_info_config_xehp.inl" +#endif diff --git a/shared/source/xe_hp_core/linux/hw_info_config_xehp.inl b/shared/source/xe_hp_core/linux/hw_info_config_xehp.inl new file mode 100644 index 0000000000..bc35d095b5 --- /dev/null +++ b/shared/source/xe_hp_core/linux/hw_info_config_xehp.inl @@ -0,0 +1,60 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/helpers/hw_helper.h" +#include "shared/source/helpers/hw_info.h" +#include "shared/source/kernel/kernel_properties.h" +#include "shared/source/os_interface/hw_info_config.h" + +namespace NEO { +template <> +int HwInfoConfigHw::configureHardwareCustom(HardwareInfo *hwInfo, OSInterface *osIface) { + auto &hwHelper = HwHelper::get(hwInfo->platform.eRenderCoreFamily); + + if (hwHelper.allowRenderCompression(*hwInfo)) { + enableRenderCompression(hwInfo); + } + + hwInfo->featureTable.ftrRcsNode = false; + if (DebugManager.flags.NodeOrdinal.get() == static_cast(aub_stream::EngineType::ENGINE_RCS)) { + hwInfo->featureTable.ftrRcsNode = true; + } + + enableBlitterOperationsSupport(hwInfo); + + return 0; +} + +template <> +bool HwInfoConfigHw::getHostMemCapabilitiesSupported(const HardwareInfo *hwInfo) { + HwHelper &hwHelper = HwHelper::get(hwInfo->platform.eRenderCoreFamily); + if (hwHelper.isWorkaroundRequired(REVISION_A0, REVISION_B, *hwInfo) && (hwHelper.getLocalMemoryAccessMode(*hwInfo) == LocalMemoryAccessMode::CpuAccessAllowed)) { + return false; + } + + return true; +} + +template <> +uint64_t HwInfoConfigHw::getHostMemCapabilitiesValue() { + return UNIFIED_SHARED_MEMORY_ACCESS; +} + +template <> +void HwInfoConfigHw::getKernelExtendedProperties(uint32_t *fp16, uint32_t *fp32, uint32_t *fp64) { + *fp16 = 0u; + *fp32 = FP_ATOMIC_EXT_FLAG_GLOBAL_ADD; + *fp64 = 0u; +} + +template <> +uint32_t HwInfoConfigHw::getDeviceMemoryMaxClkRate(const HardwareInfo *hwInfo) { + return 2800u; +} + +template class HwInfoConfigHw; +} // namespace NEO diff --git a/shared/source/xe_hp_core/preamble_xe_hp_core.cpp b/shared/source/xe_hp_core/preamble_xe_hp_core.cpp new file mode 100644 index 0000000000..d6657ca775 --- /dev/null +++ b/shared/source/xe_hp_core/preamble_xe_hp_core.cpp @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "hw_cmds.h" + +namespace NEO { +struct XeHpFamily; +using Family = XeHpFamily; +} // namespace NEO + +#include "shared/source/helpers/hw_helper.h" +#include "shared/source/helpers/preamble_xehp_plus.inl" +namespace NEO { + +template <> +void PreambleHelper::appendProgramVFEState(const HardwareInfo &hwInfo, const StreamProperties &streamProperties, uint32_t additionalKernelExecInfo, void *cmd) { + auto command = static_cast(cmd); + auto &helper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); + + if (helper.getSteppingFromHwRevId(hwInfo) >= REVISION_B) { + command->setComputeOverdispatchDisable(true); + } + + if (DebugManager.flags.CFEComputeOverdispatchDisable.get() != -1) { + command->setComputeOverdispatchDisable(DebugManager.flags.CFEComputeOverdispatchDisable.get()); + } + + if (DebugManager.flags.CFEWeightedDispatchModeDisable.get() != -1) { + command->setWeightedDispatchModeDisable(DebugManager.flags.CFEWeightedDispatchModeDisable.get()); + } + + if (DebugManager.flags.CFESingleSliceDispatchCCSMode.get() != -1) { + command->setSingleSliceDispatchCcsMode(DebugManager.flags.CFESingleSliceDispatchCCSMode.get()); + } +} + +template struct PreambleHelper; + +} // namespace NEO diff --git a/shared/source/xe_hp_core/preemption_xe_hp_core.cpp b/shared/source/xe_hp_core/preemption_xe_hp_core.cpp new file mode 100644 index 0000000000..790858f916 --- /dev/null +++ b/shared/source/xe_hp_core/preemption_xe_hp_core.cpp @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/command_stream/preemption.h" +#include "shared/source/command_stream/preemption.inl" + +namespace NEO { + +using GfxFamily = XeHpFamily; + +#include "shared/source/command_stream/preemption_xehp_plus.inl" + +template void PreemptionHelper::programCmdStream(LinearStream &cmdStream, PreemptionMode newPreemptionMode, + PreemptionMode oldPreemptionMode, GraphicsAllocation *preemptionCsr); +template size_t PreemptionHelper::getRequiredCmdStreamSize(PreemptionMode newPreemptionMode, PreemptionMode oldPreemptionMode); +template size_t PreemptionHelper::getPreemptionWaCsSize(const Device &device); +template void PreemptionHelper::applyPreemptionWaCmdsBegin(LinearStream *pCommandStream, const Device &device); +template void PreemptionHelper::applyPreemptionWaCmdsEnd(LinearStream *pCommandStream, const Device &device); +template void PreemptionHelper::programInterfaceDescriptorDataPreemption(INTERFACE_DESCRIPTOR_DATA *idd, PreemptionMode preemptionMode); +} // namespace NEO diff --git a/shared/source/xe_hp_core/state_base_address_xe_hp_core.cpp b/shared/source/xe_hp_core/state_base_address_xe_hp_core.cpp new file mode 100644 index 0000000000..c8cfcf2536 --- /dev/null +++ b/shared/source/xe_hp_core/state_base_address_xe_hp_core.cpp @@ -0,0 +1,20 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/helpers/state_base_address_xehp_plus.inl" + +namespace NEO { + +template <> +void StateBaseAddressHelper::appendExtraCacheSettings(STATE_BASE_ADDRESS *stateBaseAddress, GmmHelper *gmmHelper) { + if (DebugManager.flags.ForceStatelessL1CachingPolicy.get() != -1) { + stateBaseAddress->setL1CachePolicyL1CacheControl(static_cast(DebugManager.flags.ForceStatelessL1CachingPolicy.get())); + } +} + +template struct StateBaseAddressHelper; +} // namespace NEO diff --git a/shared/source/xe_hp_core/windows/direct_submission_xe_hp_core.cpp b/shared/source/xe_hp_core/windows/direct_submission_xe_hp_core.cpp new file mode 100644 index 0000000000..392cc48b99 --- /dev/null +++ b/shared/source/xe_hp_core/windows/direct_submission_xe_hp_core.cpp @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/direct_submission/direct_submission_hw.h" +#include "shared/source/direct_submission/dispatchers/blitter_dispatcher.inl" +#include "shared/source/direct_submission/dispatchers/dispatcher.inl" +#include "shared/source/direct_submission/dispatchers/render_dispatcher.inl" +#include "shared/source/direct_submission/windows/wddm_direct_submission.inl" + +#include "hw_cmds.h" + +namespace NEO { +using GfxFamily = XeHpFamily; + +template class WddmDirectSubmission>; +template class WddmDirectSubmission>; +} // namespace NEO diff --git a/shared/source/xe_hp_core/windows/hw_info_config_xe_hp_core.cpp b/shared/source/xe_hp_core/windows/hw_info_config_xe_hp_core.cpp new file mode 100644 index 0000000000..467d05c8ca --- /dev/null +++ b/shared/source/xe_hp_core/windows/hw_info_config_xe_hp_core.cpp @@ -0,0 +1,60 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/debug_settings/debug_settings_manager.h" +#include "shared/source/helpers/hw_helper.h" +#include "shared/source/helpers/hw_info.h" +#include "shared/source/kernel/kernel_properties.h" +#include "shared/source/os_interface/hw_info_config.h" +#include "shared/source/os_interface/hw_info_config.inl" +#include "shared/source/os_interface/hw_info_config_xehp_plus.inl" + +namespace NEO { + +#ifdef SUPPORT_XEHP +template <> +int HwInfoConfigHw::configureHardwareCustom(HardwareInfo *hwInfo, OSInterface *osIface) { + auto &hwHelper = HwHelper::get(hwInfo->platform.eRenderCoreFamily); + + if (hwHelper.allowRenderCompression(*hwInfo)) { + enableRenderCompression(hwInfo); + } + + hwInfo->featureTable.ftrRcsNode = false; + if (DebugManager.flags.NodeOrdinal.get() == static_cast(aub_stream::EngineType::ENGINE_RCS)) { + hwInfo->featureTable.ftrRcsNode = true; + } + + enableBlitterOperationsSupport(hwInfo); + return 0; +} + +template <> +bool HwInfoConfigHw::getHostMemCapabilitiesSupported(const HardwareInfo *hwInfo) { + HwHelper &hwHelper = HwHelper::get(hwInfo->platform.eRenderCoreFamily); + if (hwHelper.isWorkaroundRequired(REVISION_A0, REVISION_B, *hwInfo) && (hwHelper.getLocalMemoryAccessMode(*hwInfo) == LocalMemoryAccessMode::CpuAccessAllowed)) { + return false; + } + + return true; +} + +template <> +void HwInfoConfigHw::getKernelExtendedProperties(uint32_t *fp16, uint32_t *fp32, uint32_t *fp64) { + *fp16 = 0u; + *fp32 = FP_ATOMIC_EXT_FLAG_GLOBAL_ADD; + *fp64 = 0u; +} + +template <> +uint32_t HwInfoConfigHw::getDeviceMemoryMaxClkRate(const HardwareInfo *hwInfo) { + return 2800u; +} + +template class HwInfoConfigHw; +#endif +} // namespace NEO diff --git a/shared/test/unit_test/kernel/kernel_descriptor_tests.cpp b/shared/test/unit_test/kernel/kernel_descriptor_tests.cpp index 39de9efed4..2987b61fd5 100644 --- a/shared/test/unit_test/kernel/kernel_descriptor_tests.cpp +++ b/shared/test/unit_test/kernel/kernel_descriptor_tests.cpp @@ -108,4 +108,9 @@ TEST(KernelDescriptorAttributesSupportsBuffersBiggerThan4Gb, GivenStatefulBuffer EXPECT_FALSE(desc.kernelAttributes.supportsBuffersBiggerThan4Gb()); desc.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless; EXPECT_FALSE(desc.kernelAttributes.supportsBuffersBiggerThan4Gb()); +} + +TEST(KernelDescriptorTest, givenExtendedInfoWhenAskingForSpecialPipelineSelectModeThenReturnFalse) { + NEO::ExtendedInfoBase extendedInfo; + EXPECT_FALSE(extendedInfo.specialPipelineSelectModeRequired()); } \ No newline at end of file diff --git a/shared/test/unit_test/os_interface/linux/CMakeLists.txt b/shared/test/unit_test/os_interface/linux/CMakeLists.txt index 1bf9975bcb..866ebf07dc 100644 --- a/shared/test/unit_test/os_interface/linux/CMakeLists.txt +++ b/shared/test/unit_test/os_interface/linux/CMakeLists.txt @@ -6,6 +6,7 @@ set(NEO_CORE_OS_INTERFACE_TESTS_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt + ${CMAKE_CURRENT_SOURCE_DIR}/app_resource_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/drm_query_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/drm_special_heap_test.cpp ) diff --git a/shared/test/unit_test/os_interface/linux/app_resource_tests.cpp b/shared/test/unit_test/os_interface/linux/app_resource_tests.cpp new file mode 100644 index 0000000000..778a948b86 --- /dev/null +++ b/shared/test/unit_test/os_interface/linux/app_resource_tests.cpp @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/helpers/app_resource_helper.h" +#include "shared/test/common/mocks/mock_graphics_allocation.h" + +#include "opencl/test/unit_test/mocks/mock_memory_manager.h" +#include "test.h" + +using namespace NEO; + +TEST(AppResourceLinuxTests, givenGraphicsAllocationTypeWhenCreatingStorageInfoFromPropertiesThenResourceTagAlwaysEmpty) { + MockMemoryManager mockMemoryManager; + const DeviceBitfield singleTileMask{static_cast(1u << 2)}; + + auto allocationType = GraphicsAllocation::AllocationType::BUFFER; + AllocationProperties properties{mockRootDeviceIndex, false, 1u, allocationType, false, singleTileMask}; + + auto tag = AppResourceHelper::getResourceTagStr(properties.allocationType); + EXPECT_STREQ("", tag); + + auto storageInfo = mockMemoryManager.createStorageInfoFromProperties(properties); + EXPECT_STREQ(tag, storageInfo.resourceTag); +} diff --git a/shared/test/unit_test/os_interface/windows/CMakeLists.txt b/shared/test/unit_test/os_interface/windows/CMakeLists.txt index 6ae3666660..0f6f3e95b5 100644 --- a/shared/test/unit_test/os_interface/windows/CMakeLists.txt +++ b/shared/test/unit_test/os_interface/windows/CMakeLists.txt @@ -9,6 +9,7 @@ set(NEO_CORE_OS_INTERFACE_TESTS_WINDOWS ${CMAKE_CURRENT_SOURCE_DIR}/adapter_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/adapter_info_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/gdi_interface_tests.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/gmm_app_resource_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gmm_helper_tests_win.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_gdi_interface.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_gdi_interface.h diff --git a/shared/test/unit_test/os_interface/windows/gmm_app_resource_tests.cpp b/shared/test/unit_test/os_interface/windows/gmm_app_resource_tests.cpp new file mode 100644 index 0000000000..ba1f4b568a --- /dev/null +++ b/shared/test/unit_test/os_interface/windows/gmm_app_resource_tests.cpp @@ -0,0 +1,158 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/execution_environment/execution_environment.h" +#include "shared/source/execution_environment/root_device_environment.h" +#include "shared/source/gmm_helper/gmm.h" +#include "shared/source/gmm_helper/gmm_helper.h" +#include "shared/source/helpers/app_resource_defines.h" +#include "shared/source/helpers/app_resource_helper.h" +#include "shared/source/helpers/hw_info.h" +#include "shared/source/helpers/string.h" +#include "shared/test/common/helpers/debug_manager_state_restore.h" +#include "shared/test/common/mocks/mock_graphics_allocation.h" + +#include "opencl/test/unit_test/fixtures/mock_execution_environment_gmm_fixture.h" +#include "opencl/test/unit_test/mocks/mock_memory_manager.h" +#include "test.h" + +using MockExecutionEnvironmentGmmTest = Test; + +using namespace NEO; + +struct GmmAppResourceWinTests : public MockExecutionEnvironmentGmmTest { + void SetUp() override { + MockExecutionEnvironmentGmmFixture::SetUp(); + rootDeviceEnvironment = executionEnvironment->rootDeviceEnvironments[0].get(); + localPlatformDevice = rootDeviceEnvironment->getMutableHardwareInfo(); + } + + template + static auto getResourceTagGMM(TGMM_RESCREATE_PARAMS &src) { + if constexpr (AppResourceDefines::has_ResourceTag>) { + return &src.ResourceTag; + } else { + return nullptr; + } + } + + RootDeviceEnvironment *rootDeviceEnvironment = nullptr; + HardwareInfo *localPlatformDevice = nullptr; + const DeviceBitfield singleTileMask{static_cast(1u << 2)}; +}; + +TEST_F(GmmAppResourceWinTests, givenIncorrectGraphicsAllocationTypeWhenGettingResourceTagThenNOTFOUNDIsReturned) { + auto tag = AppResourceHelper::getResourceTagStr(static_cast(999)); + EXPECT_STREQ(tag, "NOTFOUND"); +} + +TEST_F(GmmAppResourceWinTests, givenGraphicsAllocationTypeWhenGettingResourceTagThenForEveryDefinedTypeProperTagExist) { + auto firstTypeIdx = static_cast(GraphicsAllocation::AllocationType::UNKNOWN); + auto lastTypeIdx = static_cast(GraphicsAllocation::AllocationType::COUNT); + + for (int typeIdx = firstTypeIdx; typeIdx < lastTypeIdx; typeIdx++) { + auto allocationType = static_cast(typeIdx); + auto tag = AppResourceHelper::getResourceTagStr(allocationType); + + EXPECT_LE(strlen(tag), AppResourceDefines::maxStrLen); + EXPECT_STRNE(tag, "NOTFOUND"); + } +} + +TEST_F(GmmAppResourceWinTests, givenStorageInfoCreatedFromPropertiesWhenEnableResourceTagsThenGmmResourceTagIsSet) { + if (!AppResourceDefines::resourceTagSupport) { + GTEST_SKIP(); + } + DebugManagerStateRestore restorer; + MockMemoryManager mockMemoryManager(*executionEnvironment); + + auto firstTypeIdx = static_cast(GraphicsAllocation::AllocationType::UNKNOWN); + auto lastTypeIdx = static_cast(GraphicsAllocation::AllocationType::COUNT); + DebugManager.flags.EnableResourceTags.set(true); + + for (int typeIdx = firstTypeIdx; typeIdx != lastTypeIdx; typeIdx++) { + auto allocationType = static_cast(typeIdx); + + AllocationProperties properties{mockRootDeviceIndex, false, 1u, allocationType, false, singleTileMask}; + auto storageInfo = mockMemoryManager.createStorageInfoFromProperties(properties); + auto expectedSize = (AppResourceDefines::maxStrLen + 1) * sizeof(char); + + EXPECT_EQ(expectedSize, sizeof(storageInfo.resourceTag)); + auto tag = AppResourceHelper::getResourceTagStr(properties.allocationType); + EXPECT_STREQ(storageInfo.resourceTag, tag); + } +} + +struct AllocationTypeTagTestCase { + GraphicsAllocation::AllocationType type; + const char *str; +}; + +AllocationTypeTagTestCase allocationTypeTagValues[static_cast(GraphicsAllocation::AllocationType::COUNT)] = { + {GraphicsAllocation::AllocationType::BUFFER, "BUFFER"}, + {GraphicsAllocation::AllocationType::BUFFER_COMPRESSED, "BFCMPRSD"}, + {GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, "BFHSTMEM"}, + {GraphicsAllocation::AllocationType::COMMAND_BUFFER, "CMNDBUFF"}, + {GraphicsAllocation::AllocationType::CONSTANT_SURFACE, "CSNTSRFC"}, + {GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER, "DEVQUEBF"}, + {GraphicsAllocation::AllocationType::EXTERNAL_HOST_PTR, "EXHSTPTR"}, + {GraphicsAllocation::AllocationType::FILL_PATTERN, "FILPATRN"}, + {GraphicsAllocation::AllocationType::GLOBAL_SURFACE, "GLBLSRFC"}, + {GraphicsAllocation::AllocationType::IMAGE, "IMAGE"}, + {GraphicsAllocation::AllocationType::INDIRECT_OBJECT_HEAP, "INOBHEAP"}, + {GraphicsAllocation::AllocationType::INSTRUCTION_HEAP, "INSTHEAP"}, + {GraphicsAllocation::AllocationType::INTERNAL_HEAP, "INTLHEAP"}, + {GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY, "INHSTMEM"}, + {GraphicsAllocation::AllocationType::KERNEL_ISA, "KERNLISA"}, + {GraphicsAllocation::AllocationType::KERNEL_ISA_INTERNAL, "KRLISAIN"}, + {GraphicsAllocation::AllocationType::LINEAR_STREAM, "LINRSTRM"}, + {GraphicsAllocation::AllocationType::MAP_ALLOCATION, "MAPALLOC"}, + {GraphicsAllocation::AllocationType::MCS, "MCS"}, + {GraphicsAllocation::AllocationType::PIPE, "PIPE"}, + {GraphicsAllocation::AllocationType::PREEMPTION, "PRMPTION"}, + {GraphicsAllocation::AllocationType::PRINTF_SURFACE, "PRNTSRFC"}, + {GraphicsAllocation::AllocationType::PRIVATE_SURFACE, "PRVTSRFC"}, + {GraphicsAllocation::AllocationType::PROFILING_TAG_BUFFER, "PROFTGBF"}, + {GraphicsAllocation::AllocationType::SCRATCH_SURFACE, "SCRHSRFC"}, + {GraphicsAllocation::AllocationType::WORK_PARTITION_SURFACE, "WRPRTSRF"}, + {GraphicsAllocation::AllocationType::SHARED_BUFFER, "SHRDBUFF"}, + {GraphicsAllocation::AllocationType::SHARED_CONTEXT_IMAGE, "SRDCXIMG"}, + {GraphicsAllocation::AllocationType::SHARED_IMAGE, "SHERDIMG"}, + {GraphicsAllocation::AllocationType::SHARED_RESOURCE_COPY, "SRDRSCCP"}, + {GraphicsAllocation::AllocationType::SURFACE_STATE_HEAP, "SRFCSTHP"}, + {GraphicsAllocation::AllocationType::SVM_CPU, "SVM_CPU"}, + {GraphicsAllocation::AllocationType::SVM_GPU, "SVM_GPU"}, + {GraphicsAllocation::AllocationType::SVM_ZERO_COPY, "SVM0COPY"}, + {GraphicsAllocation::AllocationType::TAG_BUFFER, "TAGBUFER"}, + {GraphicsAllocation::AllocationType::GLOBAL_FENCE, "GLBLFENC"}, + {GraphicsAllocation::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, "TSPKTGBF"}, + {GraphicsAllocation::AllocationType::UNKNOWN, "UNKNOWN"}, + {GraphicsAllocation::AllocationType::WRITE_COMBINED, "WRTCMBND"}, + {GraphicsAllocation::AllocationType::RING_BUFFER, "RINGBUFF"}, + {GraphicsAllocation::AllocationType::SEMAPHORE_BUFFER, "SMPHRBUF"}, + {GraphicsAllocation::AllocationType::DEBUG_CONTEXT_SAVE_AREA, "DBCXSVAR"}, + {GraphicsAllocation::AllocationType::DEBUG_SBA_TRACKING_BUFFER, "DBSBATRB"}, + {GraphicsAllocation::AllocationType::DEBUG_MODULE_AREA, "DBMDLARE"}, + {GraphicsAllocation::AllocationType::UNIFIED_SHARED_MEMORY, "USHRDMEM"}, + {GraphicsAllocation::AllocationType::GPU_TIMESTAMP_DEVICE_BUFFER, "GPUTSDBF"}}; +class AllocationTypeTagString : public ::testing::TestWithParam {}; + +TEST_P(AllocationTypeTagString, givenGraphicsAllocationTypeWhenCopyTagToStorageInfoThenCorrectTagIsReturned) { + if (!AppResourceDefines::resourceTagSupport) { + GTEST_SKIP(); + } + DebugManagerStateRestore restorer; + StorageInfo storageInfo = {}; + auto input = GetParam(); + + DebugManager.flags.EnableResourceTags.set(true); + AppResourceHelper::copyResourceTagStr(storageInfo.resourceTag, input.type, + sizeof(storageInfo.resourceTag)); + EXPECT_STREQ(storageInfo.resourceTag, input.str); +} + +INSTANTIATE_TEST_CASE_P(AllAllocationTypesTag, AllocationTypeTagString, ::testing::ValuesIn(allocationTypeTagValues)); diff --git a/third_party/aub_stream/headers/engine_node.h b/third_party/aub_stream/headers/engine_node.h index 523711c07f..7eb9fb9677 100644 --- a/third_party/aub_stream/headers/engine_node.h +++ b/third_party/aub_stream/headers/engine_node.h @@ -16,6 +16,9 @@ enum EngineType : uint32_t { ENGINE_VCS, ENGINE_VECS, ENGINE_CCS, + ENGINE_CCS1, + ENGINE_CCS2, + ENGINE_CCS3, NUM_ENGINES }; diff --git a/third_party/metrics_library/metrics_library_api_1_0.h b/third_party/metrics_library/metrics_library_api_1_0.h index dd742989d3..20d0dafd30 100644 --- a/third_party/metrics_library/metrics_library_api_1_0.h +++ b/third_party/metrics_library/metrics_library_api_1_0.h @@ -84,6 +84,7 @@ enum class ClientGen : uint32_t Gen11, Gen11LP, Gen12, + XE_HP, // ... Last };