From ce8284badee412cb6bbd435275e4a438e2ae4452 Mon Sep 17 00:00:00 2001 From: Filip Hazubski Date: Fri, 7 Sep 2018 17:04:18 +0200 Subject: [PATCH] Move dispatchWalker() to seperate class Change-Id: Idc95df3d67775022f2197c1f973182acb3558a2d Signed-off-by: Filip Hazubski --- CMakeLists.txt | 1 + runtime/command_queue/CMakeLists.txt | 2 + runtime/command_queue/gpgpu_walker.h | 40 +- runtime/command_queue/gpgpu_walker.inl | 320 +--------------- .../hardware_interface/hardware_interface.h | 144 +++++++ .../hardware_interface/hardware_interface.inl | 350 ++++++++++++++++++ runtime/gen10/hw_cmds.h | 6 + runtime/gen8/hw_cmds_base.h | 5 + runtime/gen9/hw_cmds_base.h | 4 + unit_tests/libult/mock_gfx_family.h | 4 + 10 files changed, 535 insertions(+), 341 deletions(-) create mode 100644 runtime/command_queue/hardware_interface/hardware_interface.h create mode 100644 runtime/command_queue/hardware_interface/hardware_interface.inl diff --git a/CMakeLists.txt b/CMakeLists.txt index c61f788f83..38c88f3516 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -539,6 +539,7 @@ include_directories(${IGDRCL_SOURCE_DIR}/runtime/gen_common/reg_configs${BRANCH_ include_directories(${IGDRCL_SOURCE_DIR}/runtime/gmm_helper/${BRANCH_DIR_SUFFIX}) include_directories(${IGDRCL_SOURCE_DIR}/runtime/gmm_helper/client_context${BRANCH_DIR_SUFFIX}) include_directories(${IGDRCL_SOURCE_DIR}/runtime/gmm_helper/gmm_memory${BRANCH_DIR_SUFFIX}) +include_directories(${IGDRCL_SOURCE_DIR}/runtime/command_queue/hardware_interface${BRANCH_DIR_SUFFIX}) set(HW_SRC_INCLUDE_PATH ${IGDRCL_SOURCE_DIR}/runtime/gen_common) diff --git a/runtime/command_queue/CMakeLists.txt b/runtime/command_queue/CMakeLists.txt index 4d1d481d4d..3971d40e1e 100644 --- a/runtime/command_queue/CMakeLists.txt +++ b/runtime/command_queue/CMakeLists.txt @@ -54,6 +54,8 @@ set(RUNTIME_SRCS_COMMAND_QUEUE ${CMAKE_CURRENT_SOURCE_DIR}/local_id_gen_avx2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/local_id_gen_sse4.cpp ${CMAKE_CURRENT_SOURCE_DIR}/local_work_size.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/hardware_interface${BRANCH_DIR_SUFFIX}/hardware_interface.h + ${CMAKE_CURRENT_SOURCE_DIR}/hardware_interface${BRANCH_DIR_SUFFIX}/hardware_interface.inl ) target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_COMMAND_QUEUE}) set_property(GLOBAL PROPERTY RUNTIME_SRCS_COMMAND_QUEUE ${RUNTIME_SRCS_COMMAND_QUEUE}) diff --git a/runtime/command_queue/gpgpu_walker.h b/runtime/command_queue/gpgpu_walker.h index c712c2c73f..7358568c6d 100644 --- a/runtime/command_queue/gpgpu_walker.h +++ b/runtime/command_queue/gpgpu_walker.h @@ -46,6 +46,9 @@ using WALKER_HANDLE = void *; template using WALKER_TYPE = typename GfxFamily::WALKER_TYPE; +template +using HARDWARE_INTERFACE = typename GfxFamily::HARDWARE_INTERFACE; + constexpr int32_t NUM_ALU_INST_FOR_READ_MODIFY_WRITE = 4; constexpr int32_t L3SQC_BIT_LQSC_RO_PERF_DIS = 0x08000000; @@ -218,43 +221,6 @@ class GpgpuWalkerHelper { TimestampPacket *timestampPacket, TimestampPacket::WriteOperationType writeOperationType); - static void getDefaultDshSpace( - const size_t &offsetInterfaceDescriptorTable, - CommandQueue &commandQueue, - const MultiDispatchInfo &multiDispatchInfo, - size_t &totalInterfaceDescriptorTableSize, - OCLRT::Kernel *parentKernelDispatched, - OCLRT::IndirectHeap *dsh, - OCLRT::LinearStream *commandStream); - - static INTERFACE_DESCRIPTOR_DATA *obtainInterfaceDescriptorData( - WALKER_HANDLE pCmdData); - - static void setOffsetCrossThreadData( - WALKER_HANDLE pCmdData, - size_t &offsetCrossThreadData, - uint32_t &interfaceDescriptorIndex); - - static void dispatchWorkarounds( - OCLRT::LinearStream *commandStream, - CommandQueue &commandQueue, - OCLRT::Kernel &kernel, - const bool &enable); - - static void dispatchProfilingPerfStartCommands( - const OCLRT::DispatchInfo &dispatchInfo, - const MultiDispatchInfo &multiDispatchInfo, - HwTimeStamps *hwTimeStamps, - OCLRT::HwPerfCounter *hwPerfCounter, - OCLRT::LinearStream *commandStream, - CommandQueue &commandQueue); - - static void dispatchProfilingPerfEndCommands( - HwTimeStamps *hwTimeStamps, - OCLRT::HwPerfCounter *hwPerfCounter, - OCLRT::LinearStream *commandStream, - CommandQueue &commandQueue); - static void dispatchScheduler( CommandQueue &commandQueue, DeviceQueueHw &devQueueHw, diff --git a/runtime/command_queue/gpgpu_walker.inl b/runtime/command_queue/gpgpu_walker.inl index 3543613454..c2531006c0 100644 --- a/runtime/command_queue/gpgpu_walker.inl +++ b/runtime/command_queue/gpgpu_walker.inl @@ -22,6 +22,8 @@ #pragma once #include "runtime/command_queue/gpgpu_walker.h" +#include "hardware_interface.h" +#include "hardware_interface.inl" #include "runtime/command_queue/command_queue.h" #include "runtime/command_queue/local_id_gen.h" #include "runtime/command_stream/command_stream_receiver.h" @@ -441,218 +443,20 @@ void GpgpuWalkerHelper::dispatchWalker( bool blockQueue, uint32_t commandType) { - OCLRT::LinearStream *commandStream = nullptr; - OCLRT::IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr; - auto parentKernel = multiDispatchInfo.peekParentKernel(); - - for (auto &dispatchInfo : multiDispatchInfo) { - // Compute local workgroup sizes - if (dispatchInfo.getLocalWorkgroupSize().x == 0) { - const auto lws = generateWorkgroupSize(dispatchInfo); - const_cast(dispatchInfo).setLWS(lws); - } - } - - // Allocate command stream and indirect heaps - if (blockQueue) { - using KCH = KernelCommandsHelper; - commandStream = new LinearStream(alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize), - MemoryConstants::pageSize); - if (parentKernel) { - uint32_t colorCalcSize = commandQueue.getContext().getDefaultDeviceQueue()->colorCalcStateSize; - - commandQueue.allocateHeapMemory( - IndirectHeap::DYNAMIC_STATE, - commandQueue.getContext().getDefaultDeviceQueue()->getDshBuffer()->getUnderlyingBufferSize(), - dsh); - - dsh->getSpace(colorCalcSize); - ioh = dsh; - commandQueue.allocateHeapMemory(IndirectHeap::SURFACE_STATE, - KernelCommandsHelper::template getSizeRequiredForExecutionModel< - IndirectHeap::SURFACE_STATE>(*parentKernel) + - KCH::getTotalSizeRequiredSSH(multiDispatchInfo), - ssh); - } else { - commandQueue.allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, KCH::getTotalSizeRequiredDSH(multiDispatchInfo), dsh); - commandQueue.allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, KCH::getTotalSizeRequiredIOH(multiDispatchInfo), ioh); - commandQueue.allocateHeapMemory(IndirectHeap::SURFACE_STATE, KCH::getTotalSizeRequiredSSH(multiDispatchInfo), ssh); - } - - using UniqueIH = std::unique_ptr; - *blockedCommandsData = new KernelOperation(std::unique_ptr(commandStream), UniqueIH(dsh), UniqueIH(ioh), - UniqueIH(ssh), *commandQueue.getDevice().getMemoryManager()); - if (parentKernel) { - (*blockedCommandsData)->doNotFreeISH = true; - } - } else { - commandStream = &commandQueue.getCS(0); - if (parentKernel && (commandQueue.getIndirectHeap(IndirectHeap::SURFACE_STATE, 0).getUsed() > 0)) { - commandQueue.releaseIndirectHeap(IndirectHeap::SURFACE_STATE); - } - dsh = &getIndirectHeap(commandQueue, multiDispatchInfo); - ioh = &getIndirectHeap(commandQueue, multiDispatchInfo); - ssh = &getIndirectHeap(commandQueue, multiDispatchInfo); - } - - if (commandQueue.getDevice().getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { - GpgpuWalkerHelper::dispatchOnDeviceWaitlistSemaphores(commandStream, commandQueue.getDevice(), - numEventsInWaitList, eventWaitList); - if (previousTimestampPacket) { - auto compareAddress = previousTimestampPacket->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextEnd); - KernelCommandsHelper::programMiSemaphoreWait(*commandStream, compareAddress, 1); - } - } - - dsh->align(KernelCommandsHelper::alignInterfaceDescriptorData); - - uint32_t interfaceDescriptorIndex = 0; - const size_t offsetInterfaceDescriptorTable = dsh->getUsed(); - - size_t totalInterfaceDescriptorTableSize = sizeof(INTERFACE_DESCRIPTOR_DATA); - - getDefaultDshSpace(offsetInterfaceDescriptorTable, commandQueue, multiDispatchInfo, totalInterfaceDescriptorTableSize, - parentKernel, dsh, commandStream); - - // Program media interface descriptor load - KernelCommandsHelper::sendMediaInterfaceDescriptorLoad( - *commandStream, - offsetInterfaceDescriptorTable, - totalInterfaceDescriptorTableSize); - - DEBUG_BREAK_IF(offsetInterfaceDescriptorTable % 64 != 0); - - size_t currentDispatchIndex = 0; - for (auto &dispatchInfo : multiDispatchInfo) { - auto &kernel = *dispatchInfo.getKernel(); - - DEBUG_BREAK_IF(!(dispatchInfo.getDim() >= 1 && dispatchInfo.getDim() <= 3)); - DEBUG_BREAK_IF(!(dispatchInfo.getGWS().z == 1 || dispatchInfo.getDim() == 3)); - DEBUG_BREAK_IF(!(dispatchInfo.getGWS().y == 1 || dispatchInfo.getDim() >= 2)); - DEBUG_BREAK_IF(!(dispatchInfo.getOffset().z == 0 || dispatchInfo.getDim() == 3)); - DEBUG_BREAK_IF(!(dispatchInfo.getOffset().y == 0 || dispatchInfo.getDim() >= 2)); - - // Determine SIMD size - uint32_t simd = kernel.getKernelInfo().getMaxSimdSize(); - - // If we don't have a required WGS, compute one opportunistically - auto maxWorkGroupSize = static_cast(commandQueue.getDevice().getDeviceInfo().maxWorkGroupSize); - if (commandType == CL_COMMAND_NDRANGE_KERNEL) { - provideLocalWorkGroupSizeHints(commandQueue.getContextPtr(), maxWorkGroupSize, dispatchInfo); - } - - //Get dispatch geometry - uint32_t dim = dispatchInfo.getDim(); - Vec3 gws = dispatchInfo.getGWS(); - Vec3 offset = dispatchInfo.getOffset(); - Vec3 swgs = dispatchInfo.getStartOfWorkgroups(); - - // Compute local workgroup sizes - Vec3 lws = dispatchInfo.getLocalWorkgroupSize(); - Vec3 elws = (dispatchInfo.getEnqueuedWorkgroupSize().x > 0) ? dispatchInfo.getEnqueuedWorkgroupSize() : lws; - - // Compute number of work groups - Vec3 twgs = (dispatchInfo.getTotalNumberOfWorkgroups().x > 0) ? dispatchInfo.getTotalNumberOfWorkgroups() - : generateWorkgroupsNumber(gws, lws); - Vec3 nwgs = (dispatchInfo.getNumberOfWorkgroups().x > 0) ? dispatchInfo.getNumberOfWorkgroups() : twgs; - - // Patch our kernel constants - *kernel.globalWorkOffsetX = static_cast(offset.x); - *kernel.globalWorkOffsetY = static_cast(offset.y); - *kernel.globalWorkOffsetZ = static_cast(offset.z); - - *kernel.globalWorkSizeX = static_cast(gws.x); - *kernel.globalWorkSizeY = static_cast(gws.y); - *kernel.globalWorkSizeZ = static_cast(gws.z); - - if ((&kernel == multiDispatchInfo.peekMainKernel()) || (kernel.localWorkSizeX2 == &Kernel::dummyPatchLocation)) { - *kernel.localWorkSizeX = static_cast(lws.x); - *kernel.localWorkSizeY = static_cast(lws.y); - *kernel.localWorkSizeZ = static_cast(lws.z); - } - - *kernel.localWorkSizeX2 = static_cast(lws.x); - *kernel.localWorkSizeY2 = static_cast(lws.y); - *kernel.localWorkSizeZ2 = static_cast(lws.z); - - *kernel.enqueuedLocalWorkSizeX = static_cast(elws.x); - *kernel.enqueuedLocalWorkSizeY = static_cast(elws.y); - *kernel.enqueuedLocalWorkSizeZ = static_cast(elws.z); - - if (&kernel == multiDispatchInfo.peekMainKernel()) { - *kernel.numWorkGroupsX = static_cast(twgs.x); - *kernel.numWorkGroupsY = static_cast(twgs.y); - *kernel.numWorkGroupsZ = static_cast(twgs.z); - } - - *kernel.workDim = dim; - - // Send our indirect object data - size_t localWorkSizes[3] = {lws.x, lws.y, lws.z}; - - dispatchProfilingPerfStartCommands(dispatchInfo, multiDispatchInfo, hwTimeStamps, - hwPerfCounter, commandStream, commandQueue); - - dispatchWorkarounds(commandStream, commandQueue, kernel, true); - - bool setupTimestampPacket = currentTimestampPacket && (currentDispatchIndex == multiDispatchInfo.size() - 1); - if (setupTimestampPacket) { - GpgpuWalkerHelper::setupTimestampPacket(commandStream, nullptr, currentTimestampPacket, - TimestampPacket::WriteOperationType::BeforeWalker); - } - - // Program the walker. Invokes execution so all state should already be programmed - auto pWalkerCmd = static_cast *>(commandStream->getSpace(sizeof(WALKER_TYPE))); - *pWalkerCmd = GfxFamily::cmdInitGpgpuWalker; - - if (setupTimestampPacket) { - GpgpuWalkerHelper::setupTimestampPacket(commandStream, pWalkerCmd, currentTimestampPacket, - TimestampPacket::WriteOperationType::AfterWalker); - } - - auto idd = obtainInterfaceDescriptorData(pWalkerCmd); - - auto offsetCrossThreadData = KernelCommandsHelper::sendIndirectState( - *commandStream, - *dsh, - *ioh, - *ssh, - kernel, - simd, - localWorkSizes, - offsetInterfaceDescriptorTable, - interfaceDescriptorIndex, - preemptionMode, - idd); - - size_t globalOffsets[3] = {offset.x, offset.y, offset.z}; - size_t startWorkGroups[3] = {swgs.x, swgs.y, swgs.z}; - size_t numWorkGroups[3] = {nwgs.x, nwgs.y, nwgs.z}; - auto localWorkSize = GpgpuWalkerHelper::setGpgpuWalkerThreadData(pWalkerCmd, globalOffsets, startWorkGroups, - numWorkGroups, localWorkSizes, simd); - - DEBUG_BREAK_IF(offsetCrossThreadData % 64 != 0); - setOffsetCrossThreadData(pWalkerCmd, offsetCrossThreadData, interfaceDescriptorIndex); - - auto threadPayload = kernel.getKernelInfo().patchInfo.threadPayload; - DEBUG_BREAK_IF(nullptr == threadPayload); - - auto numChannels = PerThreadDataHelper::getNumLocalIdChannels(*threadPayload); - auto localIdSizePerThread = PerThreadDataHelper::getLocalIdSizePerThread(simd, numChannels); - localIdSizePerThread = std::max(localIdSizePerThread, sizeof(GRF)); - - auto sizePerThreadDataTotal = getThreadsPerWG(simd, localWorkSize) * localIdSizePerThread; - DEBUG_BREAK_IF(sizePerThreadDataTotal == 0); // Hardware requires at least 1 GRF of perThreadData for each thread in thread group - - auto sizeCrossThreadData = kernel.getCrossThreadDataSize(); - auto IndirectDataLength = alignUp(static_cast(sizeCrossThreadData + sizePerThreadDataTotal), - WALKER_TYPE::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); - pWalkerCmd->setIndirectDataLength(IndirectDataLength); - - dispatchWorkarounds(commandStream, commandQueue, kernel, false); - currentDispatchIndex++; - } - dispatchProfilingPerfEndCommands(hwTimeStamps, hwPerfCounter, commandStream, commandQueue); + HARDWARE_INTERFACE hardwareInterface; + hardwareInterface.dispatchWalker( + commandQueue, + multiDispatchInfo, + numEventsInWaitList, + eventWaitList, + blockedCommandsData, + hwTimeStamps, + hwPerfCounter, + previousTimestampPacket, + currentTimestampPacket, + preemptionMode, + blockQueue, + commandType); } template @@ -673,98 +477,6 @@ inline void GpgpuWalkerHelper::dispatchOnDeviceWaitlistSemaphores(Lin } } -template -inline void GpgpuWalkerHelper::getDefaultDshSpace( - const size_t &offsetInterfaceDescriptorTable, - CommandQueue &commandQueue, - const MultiDispatchInfo &multiDispatchInfo, - size_t &totalInterfaceDescriptorTableSize, - OCLRT::Kernel *parentKernel, - OCLRT::IndirectHeap *dsh, - OCLRT::LinearStream *commandStream) { - - size_t numDispatches = multiDispatchInfo.size(); - totalInterfaceDescriptorTableSize *= numDispatches; - - if (!parentKernel) { - dsh->getSpace(totalInterfaceDescriptorTableSize); - } else { - dsh->getSpace(commandQueue.getContext().getDefaultDeviceQueue()->getDshOffset() - dsh->getUsed()); - } -} - -template -inline typename GpgpuWalkerHelper::INTERFACE_DESCRIPTOR_DATA *GpgpuWalkerHelper::obtainInterfaceDescriptorData( - WALKER_HANDLE pCmdData) { - - return nullptr; -} - -template -inline void GpgpuWalkerHelper::setOffsetCrossThreadData( - WALKER_HANDLE pCmdData, - size_t &offsetCrossThreadData, - uint32_t &interfaceDescriptorIndex) { - - WALKER_TYPE *pCmd = static_cast *>(pCmdData); - pCmd->setIndirectDataStartAddress(static_cast(offsetCrossThreadData)); - pCmd->setInterfaceDescriptorOffset(interfaceDescriptorIndex++); -} - -template -inline void GpgpuWalkerHelper::dispatchWorkarounds( - OCLRT::LinearStream *commandStream, - CommandQueue &commandQueue, - OCLRT::Kernel &kernel, - const bool &enable) { - - if (enable) { - PreemptionHelper::applyPreemptionWaCmdsBegin(commandStream, commandQueue.getDevice()); - // Implement enabling special WA DisableLSQCROPERFforOCL if needed - GpgpuWalkerHelper::applyWADisableLSQCROPERFforOCL(commandStream, kernel, enable); - } else { - // Implement disabling special WA DisableLSQCROPERFforOCL if needed - GpgpuWalkerHelper::applyWADisableLSQCROPERFforOCL(commandStream, kernel, enable); - PreemptionHelper::applyPreemptionWaCmdsEnd(commandStream, commandQueue.getDevice()); - } -} - -template -inline void GpgpuWalkerHelper::dispatchProfilingPerfStartCommands( - const OCLRT::DispatchInfo &dispatchInfo, - const MultiDispatchInfo &multiDispatchInfo, - HwTimeStamps *hwTimeStamps, - OCLRT::HwPerfCounter *hwPerfCounter, - OCLRT::LinearStream *commandStream, - CommandQueue &commandQueue) { - - if (&dispatchInfo == &*multiDispatchInfo.begin()) { - // If hwTimeStampAlloc is passed (not nullptr), then we know that profiling is enabled - if (hwTimeStamps != nullptr) { - GpgpuWalkerHelper::dispatchProfilingCommandsStart(*hwTimeStamps, commandStream); - } - if (hwPerfCounter != nullptr) { - GpgpuWalkerHelper::dispatchPerfCountersCommandsStart(commandQueue, *hwPerfCounter, commandStream); - } - } -} - -template -inline void GpgpuWalkerHelper::dispatchProfilingPerfEndCommands( - HwTimeStamps *hwTimeStamps, - OCLRT::HwPerfCounter *hwPerfCounter, - OCLRT::LinearStream *commandStream, - CommandQueue &commandQueue) { - - // If hwTimeStamps is passed (not nullptr), then we know that profiling is enabled - if (hwTimeStamps != nullptr) { - GpgpuWalkerHelper::dispatchProfilingCommandsEnd(*hwTimeStamps, commandStream); - } - if (hwPerfCounter != nullptr) { - GpgpuWalkerHelper::dispatchPerfCountersCommandsEnd(commandQueue, *hwPerfCounter, commandStream); - } -} - template void GpgpuWalkerHelper::setupTimestampPacket( LinearStream *cmdStream, diff --git a/runtime/command_queue/hardware_interface/hardware_interface.h b/runtime/command_queue/hardware_interface/hardware_interface.h new file mode 100644 index 0000000000..75dc3274d6 --- /dev/null +++ b/runtime/command_queue/hardware_interface/hardware_interface.h @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2018, Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#include "runtime/command_queue/command_queue.h" +#include "runtime/built_ins/built_ins.h" +#include "runtime/context/context.h" +#include "runtime/event/perf_counter.h" +#include "runtime/indirect_heap/indirect_heap.h" +#include "runtime/kernel/kernel.h" +#include "runtime/command_stream/linear_stream.h" +#include "runtime/event/hw_timestamps.h" +#include "runtime/command_stream/preemption.h" +#include "runtime/device_queue/device_queue_hw.h" +#include "runtime/helpers/dispatch_info.h" +#include "runtime/helpers/kernel_commands.h" +#include "runtime/helpers/task_information.h" +#include "runtime/helpers/timestamp_packet.h" +#include "runtime/program/kernel_info.h" +#include "runtime/utilities/vec.h" + +namespace OCLRT { + +using WALKER_HANDLE = void *; + +template +class HardwareInterface { + public: + using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA; + + void dispatchWalker( + CommandQueue &commandQueue, + const MultiDispatchInfo &multiDispatchInfo, + cl_uint numEventsInWaitList, + const cl_event *eventWaitList, + KernelOperation **blockedCommandsData, + HwTimeStamps *hwTimeStamps, + OCLRT::HwPerfCounter *hwPerfCounter, + TimestampPacket *previousTimestampPacket, + TimestampPacket *currentTimestampPacket, + PreemptionMode preemptionMode, + bool blockQueue, + uint32_t commandType = 0); + + virtual void getDefaultDshSpace( + const size_t &offsetInterfaceDescriptorTable, + CommandQueue &commandQueue, + const MultiDispatchInfo &multiDispatchInfo, + size_t &totalInterfaceDescriptorTableSize, + OCLRT::Kernel *parentKernel, + OCLRT::IndirectHeap *dsh, + OCLRT::LinearStream *commandStream) = 0; + + virtual INTERFACE_DESCRIPTOR_DATA *obtainInterfaceDescriptorData( + WALKER_HANDLE pCmdData) = 0; + + virtual void setOffsetCrossThreadData( + WALKER_HANDLE pCmdData, + size_t &offsetCrossThreadData, + uint32_t &interfaceDescriptorIndex) = 0; + + virtual void dispatchWorkarounds( + OCLRT::LinearStream *commandStream, + CommandQueue &commandQueue, + OCLRT::Kernel &kernel, + const bool &enable) = 0; + + virtual void dispatchProfilingPerfStartCommands( + const OCLRT::DispatchInfo &dispatchInfo, + const MultiDispatchInfo &multiDispatchInfo, + HwTimeStamps *hwTimeStamps, + OCLRT::HwPerfCounter *hwPerfCounter, + OCLRT::LinearStream *commandStream, + CommandQueue &commandQueue) = 0; + + virtual void dispatchProfilingPerfEndCommands( + HwTimeStamps *hwTimeStamps, + OCLRT::HwPerfCounter *hwPerfCounter, + OCLRT::LinearStream *commandStream, + CommandQueue &commandQueue) = 0; +}; + +template +class BaseInterfaceVersion : public HardwareInterface { + using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA; + + void getDefaultDshSpace( + const size_t &offsetInterfaceDescriptorTable, + CommandQueue &commandQueue, + const MultiDispatchInfo &multiDispatchInfo, + size_t &totalInterfaceDescriptorTableSize, + OCLRT::Kernel *parentKernel, + OCLRT::IndirectHeap *dsh, + OCLRT::LinearStream *commandStream) override; + + INTERFACE_DESCRIPTOR_DATA *obtainInterfaceDescriptorData( + WALKER_HANDLE pCmdData) override; + + void setOffsetCrossThreadData( + WALKER_HANDLE pCmdData, + size_t &offsetCrossThreadData, + uint32_t &interfaceDescriptorIndex) override; + + void dispatchWorkarounds( + OCLRT::LinearStream *commandStream, + CommandQueue &commandQueue, + OCLRT::Kernel &kernel, + const bool &enable) override; + + void dispatchProfilingPerfStartCommands( + const OCLRT::DispatchInfo &dispatchInfo, + const MultiDispatchInfo &multiDispatchInfo, + HwTimeStamps *hwTimeStamps, + OCLRT::HwPerfCounter *hwPerfCounter, + OCLRT::LinearStream *commandStream, + CommandQueue &commandQueue) override; + + void dispatchProfilingPerfEndCommands( + HwTimeStamps *hwTimeStamps, + OCLRT::HwPerfCounter *hwPerfCounter, + OCLRT::LinearStream *commandStream, + CommandQueue &commandQueue) override; +}; + +} // namespace OCLRT diff --git a/runtime/command_queue/hardware_interface/hardware_interface.inl b/runtime/command_queue/hardware_interface/hardware_interface.inl new file mode 100644 index 0000000000..95201240a0 --- /dev/null +++ b/runtime/command_queue/hardware_interface/hardware_interface.inl @@ -0,0 +1,350 @@ +/* + * Copyright (c) 2018, Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#include "runtime/command_queue/hardware_interface/hardware_interface.h" + +namespace OCLRT { + +template +void HardwareInterface::dispatchWalker( + CommandQueue &commandQueue, + const MultiDispatchInfo &multiDispatchInfo, + cl_uint numEventsInWaitList, + const cl_event *eventWaitList, + KernelOperation **blockedCommandsData, + HwTimeStamps *hwTimeStamps, + OCLRT::HwPerfCounter *hwPerfCounter, + TimestampPacket *previousTimestampPacket, + TimestampPacket *currentTimestampPacket, + PreemptionMode preemptionMode, + bool blockQueue, + uint32_t commandType) { + + OCLRT::LinearStream *commandStream = nullptr; + OCLRT::IndirectHeap *dsh = nullptr, *ioh = nullptr, *ssh = nullptr; + auto parentKernel = multiDispatchInfo.peekParentKernel(); + + for (auto &dispatchInfo : multiDispatchInfo) { + // Compute local workgroup sizes + if (dispatchInfo.getLocalWorkgroupSize().x == 0) { + const auto lws = generateWorkgroupSize(dispatchInfo); + const_cast(dispatchInfo).setLWS(lws); + } + } + + // Allocate command stream and indirect heaps + if (blockQueue) { + using KCH = KernelCommandsHelper; + commandStream = new LinearStream(alignedMalloc(MemoryConstants::pageSize, MemoryConstants::pageSize), + MemoryConstants::pageSize); + if (parentKernel) { + uint32_t colorCalcSize = commandQueue.getContext().getDefaultDeviceQueue()->colorCalcStateSize; + + commandQueue.allocateHeapMemory( + IndirectHeap::DYNAMIC_STATE, + commandQueue.getContext().getDefaultDeviceQueue()->getDshBuffer()->getUnderlyingBufferSize(), + dsh); + + dsh->getSpace(colorCalcSize); + ioh = dsh; + commandQueue.allocateHeapMemory(IndirectHeap::SURFACE_STATE, + KernelCommandsHelper::template getSizeRequiredForExecutionModel< + IndirectHeap::SURFACE_STATE>(*parentKernel) + + KCH::getTotalSizeRequiredSSH(multiDispatchInfo), + ssh); + } else { + commandQueue.allocateHeapMemory(IndirectHeap::DYNAMIC_STATE, KCH::getTotalSizeRequiredDSH(multiDispatchInfo), dsh); + commandQueue.allocateHeapMemory(IndirectHeap::INDIRECT_OBJECT, KCH::getTotalSizeRequiredIOH(multiDispatchInfo), ioh); + commandQueue.allocateHeapMemory(IndirectHeap::SURFACE_STATE, KCH::getTotalSizeRequiredSSH(multiDispatchInfo), ssh); + } + + using UniqueIH = std::unique_ptr; + *blockedCommandsData = new KernelOperation(std::unique_ptr(commandStream), UniqueIH(dsh), UniqueIH(ioh), + UniqueIH(ssh), *commandQueue.getDevice().getMemoryManager()); + if (parentKernel) { + (*blockedCommandsData)->doNotFreeISH = true; + } + } else { + commandStream = &commandQueue.getCS(0); + if (parentKernel && (commandQueue.getIndirectHeap(IndirectHeap::SURFACE_STATE, 0).getUsed() > 0)) { + commandQueue.releaseIndirectHeap(IndirectHeap::SURFACE_STATE); + } + dsh = &getIndirectHeap(commandQueue, multiDispatchInfo); + ioh = &getIndirectHeap(commandQueue, multiDispatchInfo); + ssh = &getIndirectHeap(commandQueue, multiDispatchInfo); + } + + if (commandQueue.getDevice().getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { + GpgpuWalkerHelper::dispatchOnDeviceWaitlistSemaphores(commandStream, commandQueue.getDevice(), + numEventsInWaitList, eventWaitList); + if (previousTimestampPacket) { + auto compareAddress = previousTimestampPacket->pickAddressForDataWrite(TimestampPacket::DataIndex::ContextEnd); + KernelCommandsHelper::programMiSemaphoreWait(*commandStream, compareAddress, 1); + } + } + + dsh->align(KernelCommandsHelper::alignInterfaceDescriptorData); + + uint32_t interfaceDescriptorIndex = 0; + const size_t offsetInterfaceDescriptorTable = dsh->getUsed(); + + size_t totalInterfaceDescriptorTableSize = sizeof(INTERFACE_DESCRIPTOR_DATA); + + getDefaultDshSpace(offsetInterfaceDescriptorTable, commandQueue, multiDispatchInfo, totalInterfaceDescriptorTableSize, + parentKernel, dsh, commandStream); + + // Program media interface descriptor load + KernelCommandsHelper::sendMediaInterfaceDescriptorLoad( + *commandStream, + offsetInterfaceDescriptorTable, + totalInterfaceDescriptorTableSize); + + DEBUG_BREAK_IF(offsetInterfaceDescriptorTable % 64 != 0); + + size_t currentDispatchIndex = 0; + for (auto &dispatchInfo : multiDispatchInfo) { + auto &kernel = *dispatchInfo.getKernel(); + + DEBUG_BREAK_IF(!(dispatchInfo.getDim() >= 1 && dispatchInfo.getDim() <= 3)); + DEBUG_BREAK_IF(!(dispatchInfo.getGWS().z == 1 || dispatchInfo.getDim() == 3)); + DEBUG_BREAK_IF(!(dispatchInfo.getGWS().y == 1 || dispatchInfo.getDim() >= 2)); + DEBUG_BREAK_IF(!(dispatchInfo.getOffset().z == 0 || dispatchInfo.getDim() == 3)); + DEBUG_BREAK_IF(!(dispatchInfo.getOffset().y == 0 || dispatchInfo.getDim() >= 2)); + + // Determine SIMD size + uint32_t simd = kernel.getKernelInfo().getMaxSimdSize(); + + // If we don't have a required WGS, compute one opportunistically + auto maxWorkGroupSize = static_cast(commandQueue.getDevice().getDeviceInfo().maxWorkGroupSize); + if (commandType == CL_COMMAND_NDRANGE_KERNEL) { + provideLocalWorkGroupSizeHints(commandQueue.getContextPtr(), maxWorkGroupSize, dispatchInfo); + } + + //Get dispatch geometry + uint32_t dim = dispatchInfo.getDim(); + Vec3 gws = dispatchInfo.getGWS(); + Vec3 offset = dispatchInfo.getOffset(); + Vec3 swgs = dispatchInfo.getStartOfWorkgroups(); + + // Compute local workgroup sizes + Vec3 lws = dispatchInfo.getLocalWorkgroupSize(); + Vec3 elws = (dispatchInfo.getEnqueuedWorkgroupSize().x > 0) ? dispatchInfo.getEnqueuedWorkgroupSize() : lws; + + // Compute number of work groups + Vec3 twgs = (dispatchInfo.getTotalNumberOfWorkgroups().x > 0) ? dispatchInfo.getTotalNumberOfWorkgroups() + : generateWorkgroupsNumber(gws, lws); + Vec3 nwgs = (dispatchInfo.getNumberOfWorkgroups().x > 0) ? dispatchInfo.getNumberOfWorkgroups() : twgs; + + // Patch our kernel constants + *kernel.globalWorkOffsetX = static_cast(offset.x); + *kernel.globalWorkOffsetY = static_cast(offset.y); + *kernel.globalWorkOffsetZ = static_cast(offset.z); + + *kernel.globalWorkSizeX = static_cast(gws.x); + *kernel.globalWorkSizeY = static_cast(gws.y); + *kernel.globalWorkSizeZ = static_cast(gws.z); + + if ((&kernel == multiDispatchInfo.peekMainKernel()) || (kernel.localWorkSizeX2 == &Kernel::dummyPatchLocation)) { + *kernel.localWorkSizeX = static_cast(lws.x); + *kernel.localWorkSizeY = static_cast(lws.y); + *kernel.localWorkSizeZ = static_cast(lws.z); + } + + *kernel.localWorkSizeX2 = static_cast(lws.x); + *kernel.localWorkSizeY2 = static_cast(lws.y); + *kernel.localWorkSizeZ2 = static_cast(lws.z); + + *kernel.enqueuedLocalWorkSizeX = static_cast(elws.x); + *kernel.enqueuedLocalWorkSizeY = static_cast(elws.y); + *kernel.enqueuedLocalWorkSizeZ = static_cast(elws.z); + + if (&kernel == multiDispatchInfo.peekMainKernel()) { + *kernel.numWorkGroupsX = static_cast(twgs.x); + *kernel.numWorkGroupsY = static_cast(twgs.y); + *kernel.numWorkGroupsZ = static_cast(twgs.z); + } + + *kernel.workDim = dim; + + // Send our indirect object data + size_t localWorkSizes[3] = {lws.x, lws.y, lws.z}; + + dispatchProfilingPerfStartCommands(dispatchInfo, multiDispatchInfo, hwTimeStamps, + hwPerfCounter, commandStream, commandQueue); + + dispatchWorkarounds(commandStream, commandQueue, kernel, true); + + bool setupTimestampPacket = currentTimestampPacket && (currentDispatchIndex == multiDispatchInfo.size() - 1); + if (setupTimestampPacket) { + GpgpuWalkerHelper::setupTimestampPacket(commandStream, nullptr, currentTimestampPacket, + TimestampPacket::WriteOperationType::BeforeWalker); + } + + // Program the walker. Invokes execution so all state should already be programmed + auto pWalkerCmd = static_cast *>(commandStream->getSpace(sizeof(WALKER_TYPE))); + *pWalkerCmd = GfxFamily::cmdInitGpgpuWalker; + + if (setupTimestampPacket) { + GpgpuWalkerHelper::setupTimestampPacket(commandStream, pWalkerCmd, currentTimestampPacket, + TimestampPacket::WriteOperationType::AfterWalker); + } + + auto idd = obtainInterfaceDescriptorData(pWalkerCmd); + + auto offsetCrossThreadData = KernelCommandsHelper::sendIndirectState( + *commandStream, + *dsh, + *ioh, + *ssh, + kernel, + simd, + localWorkSizes, + offsetInterfaceDescriptorTable, + interfaceDescriptorIndex, + preemptionMode, + idd); + + size_t globalOffsets[3] = {offset.x, offset.y, offset.z}; + size_t startWorkGroups[3] = {swgs.x, swgs.y, swgs.z}; + size_t numWorkGroups[3] = {nwgs.x, nwgs.y, nwgs.z}; + auto localWorkSize = GpgpuWalkerHelper::setGpgpuWalkerThreadData(pWalkerCmd, globalOffsets, startWorkGroups, + numWorkGroups, localWorkSizes, simd); + + DEBUG_BREAK_IF(offsetCrossThreadData % 64 != 0); + setOffsetCrossThreadData(pWalkerCmd, offsetCrossThreadData, interfaceDescriptorIndex); + + auto threadPayload = kernel.getKernelInfo().patchInfo.threadPayload; + DEBUG_BREAK_IF(nullptr == threadPayload); + + auto numChannels = PerThreadDataHelper::getNumLocalIdChannels(*threadPayload); + auto localIdSizePerThread = PerThreadDataHelper::getLocalIdSizePerThread(simd, numChannels); + localIdSizePerThread = std::max(localIdSizePerThread, sizeof(GRF)); + + auto sizePerThreadDataTotal = getThreadsPerWG(simd, localWorkSize) * localIdSizePerThread; + DEBUG_BREAK_IF(sizePerThreadDataTotal == 0); // Hardware requires at least 1 GRF of perThreadData for each thread in thread group + + auto sizeCrossThreadData = kernel.getCrossThreadDataSize(); + auto IndirectDataLength = alignUp(static_cast(sizeCrossThreadData + sizePerThreadDataTotal), + WALKER_TYPE::INDIRECTDATASTARTADDRESS_ALIGN_SIZE); + pWalkerCmd->setIndirectDataLength(IndirectDataLength); + + dispatchWorkarounds(commandStream, commandQueue, kernel, false); + currentDispatchIndex++; + } + dispatchProfilingPerfEndCommands(hwTimeStamps, hwPerfCounter, commandStream, commandQueue); +} + +template +inline void BaseInterfaceVersion::getDefaultDshSpace( + const size_t &offsetInterfaceDescriptorTable, + CommandQueue &commandQueue, + const MultiDispatchInfo &multiDispatchInfo, + size_t &totalInterfaceDescriptorTableSize, + OCLRT::Kernel *parentKernel, + OCLRT::IndirectHeap *dsh, + OCLRT::LinearStream *commandStream) { + + size_t numDispatches = multiDispatchInfo.size(); + totalInterfaceDescriptorTableSize *= numDispatches; + + if (!parentKernel) { + dsh->getSpace(totalInterfaceDescriptorTableSize); + } else { + dsh->getSpace(commandQueue.getContext().getDefaultDeviceQueue()->getDshOffset() - dsh->getUsed()); + } +} + +template +inline typename BaseInterfaceVersion::INTERFACE_DESCRIPTOR_DATA * +BaseInterfaceVersion::obtainInterfaceDescriptorData( + WALKER_HANDLE pCmdData) { + + return nullptr; +} + +template +inline void BaseInterfaceVersion::setOffsetCrossThreadData( + WALKER_HANDLE pCmdData, + size_t &offsetCrossThreadData, + uint32_t &interfaceDescriptorIndex) { + + WALKER_TYPE *pCmd = static_cast *>(pCmdData); + pCmd->setIndirectDataStartAddress(static_cast(offsetCrossThreadData)); + pCmd->setInterfaceDescriptorOffset(interfaceDescriptorIndex++); +} + +template +inline void BaseInterfaceVersion::dispatchWorkarounds( + OCLRT::LinearStream *commandStream, + CommandQueue &commandQueue, + OCLRT::Kernel &kernel, + const bool &enable) { + + if (enable) { + PreemptionHelper::applyPreemptionWaCmdsBegin(commandStream, commandQueue.getDevice()); + // Implement enabling special WA DisableLSQCROPERFforOCL if needed + GpgpuWalkerHelper::applyWADisableLSQCROPERFforOCL(commandStream, kernel, enable); + } else { + // Implement disabling special WA DisableLSQCROPERFforOCL if needed + GpgpuWalkerHelper::applyWADisableLSQCROPERFforOCL(commandStream, kernel, enable); + PreemptionHelper::applyPreemptionWaCmdsEnd(commandStream, commandQueue.getDevice()); + } +} + +template +inline void BaseInterfaceVersion::dispatchProfilingPerfStartCommands( + const OCLRT::DispatchInfo &dispatchInfo, + const MultiDispatchInfo &multiDispatchInfo, + HwTimeStamps *hwTimeStamps, + OCLRT::HwPerfCounter *hwPerfCounter, + OCLRT::LinearStream *commandStream, + CommandQueue &commandQueue) { + + if (&dispatchInfo == &*multiDispatchInfo.begin()) { + // If hwTimeStampAlloc is passed (not nullptr), then we know that profiling is enabled + if (hwTimeStamps != nullptr) { + GpgpuWalkerHelper::dispatchProfilingCommandsStart(*hwTimeStamps, commandStream); + } + if (hwPerfCounter != nullptr) { + GpgpuWalkerHelper::dispatchPerfCountersCommandsStart(commandQueue, *hwPerfCounter, commandStream); + } + } +} + +template +inline void BaseInterfaceVersion::dispatchProfilingPerfEndCommands( + HwTimeStamps *hwTimeStamps, + OCLRT::HwPerfCounter *hwPerfCounter, + OCLRT::LinearStream *commandStream, + CommandQueue &commandQueue) { + + // If hwTimeStamps is passed (not nullptr), then we know that profiling is enabled + if (hwTimeStamps != nullptr) { + GpgpuWalkerHelper::dispatchProfilingCommandsEnd(*hwTimeStamps, commandStream); + } + if (hwPerfCounter != nullptr) { + GpgpuWalkerHelper::dispatchPerfCountersCommandsEnd(commandQueue, *hwPerfCounter, commandStream); + } +} + +} // namespace OCLRT diff --git a/runtime/gen10/hw_cmds.h b/runtime/gen10/hw_cmds.h index 53a1ec5997..eec1b3ac7c 100644 --- a/runtime/gen10/hw_cmds.h +++ b/runtime/gen10/hw_cmds.h @@ -30,14 +30,20 @@ #define TILERESOURCE_CHICKENBIT_VECTOR_BITMASK (1UL << 8) struct CnlParse; namespace OCLRT { + +template +class BaseInterfaceVersion; + struct GEN10 { #include "runtime/gen10/hw_cmds_generated_patched.h" #include "runtime/gen10/hw_cmds_generated.h" }; + struct CNLFamily : public GEN10 { typedef CnlParse PARSE; typedef CNLFamily GfxFamily; typedef GPGPU_WALKER WALKER_TYPE; + using HARDWARE_INTERFACE = BaseInterfaceVersion; static const GPGPU_WALKER cmdInitGpgpuWalker; static const INTERFACE_DESCRIPTOR_DATA cmdInitInterfaceDescriptorData; static const MEDIA_INTERFACE_DESCRIPTOR_LOAD cmdInitMediaInterfaceDescriptorLoad; diff --git a/runtime/gen8/hw_cmds_base.h b/runtime/gen8/hw_cmds_base.h index 853a264366..ca78aa5609 100644 --- a/runtime/gen8/hw_cmds_base.h +++ b/runtime/gen8/hw_cmds_base.h @@ -30,6 +30,10 @@ //forward declaration for parsing logic struct BdwParse; namespace OCLRT { + +template +class BaseInterfaceVersion; + struct GEN8 { #include "runtime/gen8/hw_cmds_generated.h" #include "runtime/gen8/hw_cmds_generated_patched.h" @@ -38,6 +42,7 @@ struct BDWFamily : public GEN8 { typedef BdwParse PARSE; typedef BDWFamily GfxFamily; typedef GPGPU_WALKER WALKER_TYPE; + using HARDWARE_INTERFACE = BaseInterfaceVersion; static const GPGPU_WALKER cmdInitGpgpuWalker; static const INTERFACE_DESCRIPTOR_DATA cmdInitInterfaceDescriptorData; static const MEDIA_INTERFACE_DESCRIPTOR_LOAD cmdInitMediaInterfaceDescriptorLoad; diff --git a/runtime/gen9/hw_cmds_base.h b/runtime/gen9/hw_cmds_base.h index 2f36b48223..ee65c57dab 100644 --- a/runtime/gen9/hw_cmds_base.h +++ b/runtime/gen9/hw_cmds_base.h @@ -31,6 +31,9 @@ struct SklParse; namespace OCLRT { +template +class BaseInterfaceVersion; + struct GEN9 { #include "runtime/gen9/hw_cmds_generated_patched.h" #include "runtime/gen9/hw_cmds_generated.h" @@ -40,6 +43,7 @@ struct SKLFamily : public GEN9 { typedef SklParse PARSE; typedef SKLFamily GfxFamily; typedef GPGPU_WALKER WALKER_TYPE; + using HARDWARE_INTERFACE = BaseInterfaceVersion; static const GPGPU_WALKER cmdInitGpgpuWalker; static const INTERFACE_DESCRIPTOR_DATA cmdInitInterfaceDescriptorData; static const MEDIA_INTERFACE_DESCRIPTOR_LOAD cmdInitMediaInterfaceDescriptorLoad; diff --git a/unit_tests/libult/mock_gfx_family.h b/unit_tests/libult/mock_gfx_family.h index 5e3bf90782..695bfaf282 100644 --- a/unit_tests/libult/mock_gfx_family.h +++ b/unit_tests/libult/mock_gfx_family.h @@ -26,6 +26,9 @@ namespace OCLRT { +template +class BaseInterfaceVersion; + extern HwHelper *hwHelperFactory[IGFX_MAX_CORE]; struct GENX { @@ -358,6 +361,7 @@ struct GENX { inline void setCompareOperation(COMPARE_OPERATION value) {} } MI_SEMAPHORE_WAIT; + using HARDWARE_INTERFACE = BaseInterfaceVersion; typedef GPGPU_WALKER WALKER_TYPE; static GPGPU_WALKER cmdInitGpgpuWalker; static INTERFACE_DESCRIPTOR_DATA cmdInitInterfaceDescriptorData;