From ddb0d82e83ff4859142de4be3c79c0fbda61c407 Mon Sep 17 00:00:00 2001 From: "Dunajski, Bartosz" Date: Tue, 9 Jul 2019 14:24:33 +0200 Subject: [PATCH] Introduce RegisteredMethodDispatcher. - Inject dispatch methods per DispatchInfo - Each DispatchInfo in MultiDispatchInfo can have different behaviour - Implement AuxTranslation programming with new approach Change-Id: Ie28de0c72a77b8e91509a5b9b8740d72fedf4ad6 Signed-off-by: Dunajski, Bartosz --- runtime/built_ins/aux_translation_builtin.h | 51 ++++++++++- runtime/built_ins/built_ins.inl | 29 ------- runtime/command_queue/command_queue.cpp | 16 ---- runtime/command_queue/command_queue.h | 3 - runtime/command_queue/command_queue_hw.h | 3 + .../command_queue/command_queue_hw_base.inl | 14 +++ runtime/command_queue/gpgpu_walker_base.inl | 8 +- .../command_queue/hardware_interface_base.inl | 16 +--- runtime/helpers/CMakeLists.txt | 1 + runtime/helpers/dispatch_info.h | 10 ++- runtime/helpers/dispatch_info_builder.h | 7 +- .../helpers/registered_method_dispatcher.h | 47 ++++++++++ unit_tests/built_ins/built_in_tests.cpp | 34 ++++---- .../command_queue/dispatch_walker_tests.cpp | 86 +++++++++++++++++-- .../command_queue/enqueue_kernel_2_tests.cpp | 20 +++-- 15 files changed, 241 insertions(+), 104 deletions(-) create mode 100644 runtime/helpers/registered_method_dispatcher.h diff --git a/runtime/built_ins/aux_translation_builtin.h b/runtime/built_ins/aux_translation_builtin.h index 96dd769e7a..efe85b4d0c 100644 --- a/runtime/built_ins/aux_translation_builtin.h +++ b/runtime/built_ins/aux_translation_builtin.h @@ -9,6 +9,7 @@ #include "runtime/built_ins/built_ins.h" #include "runtime/built_ins/builtins_dispatch_builder.h" #include "runtime/helpers/dispatch_info_builder.h" +#include "runtime/helpers/hw_helper.h" #include @@ -17,9 +18,57 @@ template <> class BuiltInOp : public BuiltinDispatchInfoBuilder { public: BuiltInOp(BuiltIns &kernelsLib, Context &context, Device &device); - bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const override; + template + bool buildDispatchInfosForAuxTranslation(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const { + size_t kernelInstanceNumber = 0; + size_t numMemObjectsToTranslate = operationParams.memObjsForAuxTranslation->size(); + resizeKernelInstances(numMemObjectsToTranslate); + multiDispatchInfo.setBuiltinOpParams(operationParams); + + for (auto &memObj : *operationParams.memObjsForAuxTranslation) { + DispatchInfoBuilder builder; + auto graphicsAllocation = memObj->getGraphicsAllocation(); + size_t allocationSize = alignUp(memObj->getSize(), 512); + + UNRECOVERABLE_IF(builder.getMaxNumDispatches() != 1); + + if (kernelInstanceNumber == 0) { + // Before Kernel + bool dcFlush = (AuxTranslationDirection::AuxToNonAux == operationParams.auxTranslationDirection); + registerPipeControlProgramming(builder.getDispatchInfo(0).dispatchInitCommands, dcFlush); + } + if (kernelInstanceNumber == numMemObjectsToTranslate - 1) { + // After Kernel + registerPipeControlProgramming(builder.getDispatchInfo(0).dispatchEpilogueCommands, false); + } + + if (AuxTranslationDirection::AuxToNonAux == operationParams.auxTranslationDirection) { + builder.setKernel(convertToNonAuxKernel[kernelInstanceNumber++].get()); + builder.setArg(0, memObj); + builder.setArgSvm(1, allocationSize, reinterpret_cast(graphicsAllocation->getGpuAddress()), nullptr, 0u); + } else { + UNRECOVERABLE_IF(AuxTranslationDirection::NonAuxToAux != operationParams.auxTranslationDirection); + builder.setKernel(convertToAuxKernel[kernelInstanceNumber++].get()); + builder.setArgSvm(0, allocationSize, reinterpret_cast(graphicsAllocation->getGpuAddress()), nullptr, 0u); + builder.setArg(1, memObj); + } + + size_t xGws = allocationSize / 16; + + builder.setDispatchGeometry(Vec3{xGws, 0, 0}, Vec3{0, 0, 0}, Vec3{0, 0, 0}); + builder.bake(multiDispatchInfo); + } + + return true; + } protected: + template + void registerPipeControlProgramming(RegisteredMethodDispatcher &dispatcher, bool dcFlush) const { + auto method = std::bind(PipeControlHelper::addPipeControl, std::placeholders::_1, dcFlush); + dispatcher.registerMethod(method); + dispatcher.registerCommandsSizeEstimationMethod(PipeControlHelper::getSizeForSinglePipeControl); + } void resizeKernelInstances(size_t size) const; Kernel *baseKernel = nullptr; mutable std::vector> convertToNonAuxKernel; diff --git a/runtime/built_ins/built_ins.inl b/runtime/built_ins/built_ins.inl index 72e36888bd..39f3e7b28d 100644 --- a/runtime/built_ins/built_ins.inl +++ b/runtime/built_ins/built_ins.inl @@ -23,35 +23,6 @@ BuiltInOp::BuiltInOp(BuiltIns &kernelsLib, Context resizeKernelInstances(5); } -bool BuiltInOp::buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const { - size_t kernelInstanceNumber = 0; - resizeKernelInstances(operationParams.memObjsForAuxTranslation->size()); - multiDispatchInfo.setBuiltinOpParams(operationParams); - for (auto &memObj : *operationParams.memObjsForAuxTranslation) { - DispatchInfoBuilder builder; - auto graphicsAllocation = memObj->getGraphicsAllocation(); - size_t allocationSize = alignUp(memObj->getSize(), 512); - - if (AuxTranslationDirection::AuxToNonAux == operationParams.auxTranslationDirection) { - builder.setKernel(convertToNonAuxKernel.at(kernelInstanceNumber++).get()); - builder.setArg(0, memObj); - builder.setArgSvm(1, allocationSize, reinterpret_cast(graphicsAllocation->getGpuAddress()), nullptr, 0u); - } else { - UNRECOVERABLE_IF(AuxTranslationDirection::NonAuxToAux != operationParams.auxTranslationDirection); - builder.setKernel(convertToAuxKernel.at(kernelInstanceNumber++).get()); - builder.setArgSvm(0, allocationSize, reinterpret_cast(graphicsAllocation->getGpuAddress()), nullptr, 0u); - builder.setArg(1, memObj); - } - - size_t xGws = allocationSize / 16; - - builder.setDispatchGeometry(Vec3{xGws, 0, 0}, Vec3{0, 0, 0}, Vec3{0, 0, 0}); - builder.bake(multiDispatchInfo); - } - - return true; -} - void BuiltInOp::resizeKernelInstances(size_t size) const { convertToNonAuxKernel.reserve(size); convertToAuxKernel.reserve(size); diff --git a/runtime/command_queue/command_queue.cpp b/runtime/command_queue/command_queue.cpp index 3c3204d97a..f351fdbe5d 100644 --- a/runtime/command_queue/command_queue.cpp +++ b/runtime/command_queue/command_queue.cpp @@ -521,22 +521,6 @@ void CommandQueue::releaseIndirectHeap(IndirectHeap::Type heapType) { getCommandStreamReceiver().releaseIndirectHeap(heapType); } -void CommandQueue::dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, MemObjsForAuxTranslation &memObjsForAuxTranslation, - AuxTranslationDirection auxTranslationDirection) { - if (!multiDispatchInfo.empty()) { - multiDispatchInfo.rbegin()->setPipeControlRequired(true); - } - auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getContext(), getDevice()); - BuiltinOpParams dispatchParams; - - dispatchParams.memObjsForAuxTranslation = &memObjsForAuxTranslation; - dispatchParams.auxTranslationDirection = auxTranslationDirection; - - builder.buildDispatchInfos(multiDispatchInfo, dispatchParams); - - multiDispatchInfo.rbegin()->setPipeControlRequired(true); -} - void CommandQueue::obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes, bool clearAllDependencies) { auto allocator = getCommandStreamReceiver().getTimestampPacketAllocator(); diff --git a/runtime/command_queue/command_queue.h b/runtime/command_queue/command_queue.h index 6cb46b4162..e15efc0a24 100644 --- a/runtime/command_queue/command_queue.h +++ b/runtime/command_queue/command_queue.h @@ -429,9 +429,6 @@ class CommandQueue : public BaseObject<_cl_command_queue> { virtual void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType){}; - MOCKABLE_VIRTUAL void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, MemObjsForAuxTranslation &memObjsForAuxTranslation, - AuxTranslationDirection auxTranslationDirection); - MOCKABLE_VIRTUAL void obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes, bool clearAllDependencies); void processProperties(const cl_queue_properties *properties); bool bufferCpuCopyAllowed(Buffer *buffer, cl_command_type commandType, cl_bool blocking, size_t size, void *ptr, diff --git a/runtime/command_queue/command_queue_hw.h b/runtime/command_queue/command_queue_hw.h index bcf2904f3b..63f3e27b04 100644 --- a/runtime/command_queue/command_queue_hw.h +++ b/runtime/command_queue/command_queue_hw.h @@ -382,6 +382,9 @@ class CommandQueueHw : public CommandQueue { cl_int enqueueMarkerForReadWriteOperation(MemObj *memObj, void *ptr, cl_command_type commandType, cl_bool blocking, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); + MOCKABLE_VIRTUAL void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, MemObjsForAuxTranslation &memObjsForAuxTranslation, + AuxTranslationDirection auxTranslationDirection); + private: bool isTaskLevelUpdateRequired(const uint32_t &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType); void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType) override; diff --git a/runtime/command_queue/command_queue_hw_base.inl b/runtime/command_queue/command_queue_hw_base.inl index 01eb302b46..d0288da2fc 100644 --- a/runtime/command_queue/command_queue_hw_base.inl +++ b/runtime/command_queue/command_queue_hw_base.inl @@ -5,6 +5,7 @@ * */ +#include "runtime/built_ins/aux_translation_builtin.h" #include "runtime/command_queue/enqueue_barrier.h" #include "runtime/command_queue/enqueue_copy_buffer.h" #include "runtime/command_queue/enqueue_copy_buffer_rect.h" @@ -99,4 +100,17 @@ cl_int CommandQueueHw::enqueueMarkerForReadWriteOperation(MemObj *memObj return CL_SUCCESS; } + +template +void CommandQueueHw::dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, MemObjsForAuxTranslation &memObjsForAuxTranslation, + AuxTranslationDirection auxTranslationDirection) { + auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getContext(), getDevice()); + auto &auxTranslationBuilder = static_cast &>(builder); + BuiltinOpParams dispatchParams; + + dispatchParams.memObjsForAuxTranslation = &memObjsForAuxTranslation; + dispatchParams.auxTranslationDirection = auxTranslationDirection; + + auxTranslationBuilder.buildDispatchInfosForAuxTranslation(multiDispatchInfo, dispatchParams); +} } // namespace NEO diff --git a/runtime/command_queue/gpgpu_walker_base.inl b/runtime/command_queue/gpgpu_walker_base.inl index 35aafea023..407a80fb3e 100644 --- a/runtime/command_queue/gpgpu_walker_base.inl +++ b/runtime/command_queue/gpgpu_walker_base.inl @@ -196,14 +196,10 @@ size_t EnqueueOperation::getTotalSizeRequiredCS(uint32_t eventType, c size_t expectedSizeCS = 0; Kernel *parentKernel = multiDispatchInfo.peekParentKernel(); - if (multiDispatchInfo.peekMainKernel() && multiDispatchInfo.peekMainKernel()->isAuxTranslationRequired()) { - expectedSizeCS += sizeof(PIPE_CONTROL); - } for (auto &dispatchInfo : multiDispatchInfo) { expectedSizeCS += EnqueueOperation::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, dispatchInfo.getKernel()); - if (dispatchInfo.isPipeControlRequired()) { - expectedSizeCS += sizeof(PIPE_CONTROL); - } + expectedSizeCS += dispatchInfo.dispatchInitCommands.estimateCommandsSize(); + expectedSizeCS += dispatchInfo.dispatchEpilogueCommands.estimateCommandsSize(); } if (parentKernel) { SchedulerKernel &scheduler = commandQueue.getDevice().getExecutionEnvironment()->getBuiltIns()->getSchedulerKernel(parentKernel->getContext()); diff --git a/runtime/command_queue/hardware_interface_base.inl b/runtime/command_queue/hardware_interface_base.inl index f1e497e2d2..2fc03cdebc 100644 --- a/runtime/command_queue/hardware_interface_base.inl +++ b/runtime/command_queue/hardware_interface_base.inl @@ -114,18 +114,11 @@ void HardwareInterface::dispatchWalker( DEBUG_BREAK_IF(offsetInterfaceDescriptorTable % 64 != 0); - if (mainKernel->isAuxTranslationRequired()) { - using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; - auto pPipeControlCmd = static_cast(commandStream->getSpace(sizeof(PIPE_CONTROL))); - *pPipeControlCmd = GfxFamily::cmdInitPipeControl; - pPipeControlCmd->setDcFlushEnable(true); - pPipeControlCmd->setCommandStreamerStallEnable(true); - } - dispatchProfilingPerfStartCommands(hwTimeStamps, hwPerfCounter, commandStream, commandQueue); size_t currentDispatchIndex = 0; for (auto &dispatchInfo : multiDispatchInfo) { + dispatchInfo.dispatchInitCommands(*commandStream); auto &kernel = *dispatchInfo.getKernel(); DEBUG_BREAK_IF(!(dispatchInfo.getDim() >= 1 && dispatchInfo.getDim() <= 3)); DEBUG_BREAK_IF(!(dispatchInfo.getGWS().z == 1 || dispatchInfo.getDim() == 3)); @@ -203,14 +196,9 @@ void HardwareInterface::dispatchWalker( offsetInterfaceDescriptorTable, numberOfWorkgroups, startOfWorkgroups); dispatchWorkarounds(commandStream, commandQueue, kernel, false); - if (dispatchInfo.isPipeControlRequired()) { - using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; - auto pPipeControlCmd = static_cast(commandStream->getSpace(sizeof(PIPE_CONTROL))); - *pPipeControlCmd = GfxFamily::cmdInitPipeControl; - pPipeControlCmd->setCommandStreamerStallEnable(true); - } currentDispatchIndex++; + dispatchInfo.dispatchEpilogueCommands(*commandStream); } if (mainKernel->requiresCacheFlushCommand(commandQueue)) { uint64_t postSyncAddress = 0; diff --git a/runtime/helpers/CMakeLists.txt b/runtime/helpers/CMakeLists.txt index 5b6c88ea69..def5281d29 100644 --- a/runtime/helpers/CMakeLists.txt +++ b/runtime/helpers/CMakeLists.txt @@ -73,6 +73,7 @@ set(RUNTIME_SRCS_HELPERS_BASE ${CMAKE_CURRENT_SOURCE_DIR}/properties_helper.cpp ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/queue_helpers.cpp ${CMAKE_CURRENT_SOURCE_DIR}/queue_helpers.h + ${CMAKE_CURRENT_SOURCE_DIR}/registered_method_dispatcher.h ${CMAKE_CURRENT_SOURCE_DIR}/sampler_helpers.h ${CMAKE_CURRENT_SOURCE_DIR}/state_base_address.h ${CMAKE_CURRENT_SOURCE_DIR}/state_base_address_base.inl diff --git a/runtime/helpers/dispatch_info.h b/runtime/helpers/dispatch_info.h index e36e9b7dca..007b33a9b6 100644 --- a/runtime/helpers/dispatch_info.h +++ b/runtime/helpers/dispatch_info.h @@ -9,6 +9,7 @@ #include "core/helpers/vec.h" #include "runtime/built_ins/builtins_dispatch_builder.h" +#include "runtime/helpers/registered_method_dispatcher.h" #include "runtime/mem_obj/mem_obj.h" #include "runtime/memory_manager/surface.h" #include "runtime/utilities/stackvec.h" @@ -21,14 +22,15 @@ namespace NEO { class Kernel; class DispatchInfo { + public: + using DispatchCommandMethodT = void(LinearStream &commandStream); + DispatchInfo() = default; DispatchInfo(Kernel *kernel, uint32_t dim, Vec3 gws, Vec3 elws, Vec3 offset) : kernel(kernel), dim(dim), gws(gws), elws(elws), offset(offset) {} DispatchInfo(Kernel *kernel, uint32_t dim, Vec3 gws, Vec3 elws, Vec3 offset, Vec3 agws, Vec3 lws, Vec3 twgs, Vec3 nwgs, Vec3 swgs) : kernel(kernel), dim(dim), gws(gws), elws(elws), offset(offset), agws(agws), lws(lws), twgs(twgs), nwgs(nwgs), swgs(swgs) {} - bool isPipeControlRequired() const { return pipeControlRequired; } - void setPipeControlRequired(bool blocking) { this->pipeControlRequired = blocking; } bool usesSlm() const; bool usesStatelessPrintfSurface() const; uint32_t getRequiredScratchSize() const; @@ -56,8 +58,10 @@ class DispatchInfo { bool peekCanBePartitioned() const { return canBePartitioned; } void setCanBePartitioned(bool canBePartitioned) { this->canBePartitioned = canBePartitioned; } + RegisteredMethodDispatcher dispatchInitCommands; + RegisteredMethodDispatcher dispatchEpilogueCommands; + protected: - bool pipeControlRequired = false; bool canBePartitioned = false; Kernel *kernel = nullptr; uint32_t dim = 0; diff --git a/runtime/helpers/dispatch_info_builder.h b/runtime/helpers/dispatch_info_builder.h index 33cecc4cb4..f2ac92b085 100644 --- a/runtime/helpers/dispatch_info_builder.h +++ b/runtime/helpers/dispatch_info_builder.h @@ -279,6 +279,9 @@ class DispatchInfoBuilder { } } + DispatchInfo &getDispatchInfo(size_t index) { return dispatchInfos[index]; } + static constexpr size_t getMaxNumDispatches() { return numDispatches; } + protected: static bool supportsSplit() { return (Mode == SplitDispatch::SplitMode::WalkerSplit); @@ -420,10 +423,6 @@ class DispatchInfoBuilder { return static_cast(x); } - static constexpr size_t getMaxNumDispatches() { - return numDispatches; - } - static const size_t numDispatches = (Mode == SplitDispatch::SplitMode::WalkerSplit) ? 1 : powConst((static_cast(Mode) + 1), // 1 (middle) 2 (middle + right/bottom) or 3 (lef/top + middle + right/mottom) (static_cast(Dim) + 1)); // 1, 2 or 3 diff --git a/runtime/helpers/registered_method_dispatcher.h b/runtime/helpers/registered_method_dispatcher.h new file mode 100644 index 0000000000..dd33b2689a --- /dev/null +++ b/runtime/helpers/registered_method_dispatcher.h @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2017-2019 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once + +#include +#include + +namespace NEO { +template +class RegisteredMethodDispatcher { + public: + using CommandsSizeEstimationMethodT = std::function; + using RegisteredMethodT = std::function; + + void registerMethod(RegisteredMethodT method) { + this->method = method; + } + + void registerCommandsSizeEstimationMethod(CommandsSizeEstimationMethodT method) { + this->commandsEstimationMethod = method; + } + + template + void operator()(Args &&... args) const { + if (method) { + method(std::forward(args)...); + } + } + + size_t estimateCommandsSize() const { + if (commandsEstimationMethod) { + return commandsEstimationMethod(); + } + return 0; + } + + protected: + CommandsSizeEstimationMethodT commandsEstimationMethod; + RegisteredMethodT method; +}; + +} // namespace NEO diff --git a/unit_tests/built_ins/built_in_tests.cpp b/unit_tests/built_ins/built_in_tests.cpp index 6d7e5e818f..75ffe7b077 100644 --- a/unit_tests/built_ins/built_in_tests.cpp +++ b/unit_tests/built_ins/built_in_tests.cpp @@ -246,8 +246,9 @@ TEST_F(BuiltInTests, BuiltinDispatchInfoBuilderCopyBufferToBuffer) { delete dstPtr; } -TEST_F(BuiltInTests, givenInputBufferWhenBuildingNonAuxDispatchInfoForAuxTranslationThenPickAndSetupCorrectKernels) { - BuiltinDispatchInfoBuilder &builder = pBuiltIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pContext, *pDevice); +HWTEST_F(BuiltInTests, givenInputBufferWhenBuildingNonAuxDispatchInfoForAuxTranslationThenPickAndSetupCorrectKernels) { + BuiltinDispatchInfoBuilder &baseBuilder = pBuiltIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pContext, *pDevice); + auto &builder = static_cast &>(baseBuilder); MemObjsForAuxTranslation memObjsForAuxTranslation; MultiDispatchInfo multiDispatchInfo; @@ -265,7 +266,7 @@ TEST_F(BuiltInTests, givenInputBufferWhenBuildingNonAuxDispatchInfoForAuxTransla memObjsForAuxTranslation.insert(&buffer); } - EXPECT_TRUE(builder.buildDispatchInfos(multiDispatchInfo, builtinOpsParams)); + EXPECT_TRUE(builder.buildDispatchInfosForAuxTranslation(multiDispatchInfo, builtinOpsParams)); EXPECT_EQ(3u, multiDispatchInfo.size()); for (auto &dispatchInfo : multiDispatchInfo) { @@ -294,8 +295,9 @@ TEST_F(BuiltInTests, givenInputBufferWhenBuildingNonAuxDispatchInfoForAuxTransla EXPECT_NE(builtinKernels[1], builtinKernels[2]); } -TEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxDispatchInfoForAuxTranslationThenPickAndSetupCorrectKernels) { - BuiltinDispatchInfoBuilder &builder = pBuiltIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pContext, *pDevice); +HWTEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxDispatchInfoForAuxTranslationThenPickAndSetupCorrectKernels) { + BuiltinDispatchInfoBuilder &baseBuilder = pBuiltIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pContext, *pDevice); + auto &builder = static_cast &>(baseBuilder); MemObjsForAuxTranslation memObjsForAuxTranslation; MultiDispatchInfo multiDispatchInfo; @@ -313,7 +315,7 @@ TEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxDispatchInfoForAuxTranslatio memObjsForAuxTranslation.insert(&buffer); } - EXPECT_TRUE(builder.buildDispatchInfos(multiDispatchInfo, builtinOpsParams)); + EXPECT_TRUE(builder.buildDispatchInfosForAuxTranslation(multiDispatchInfo, builtinOpsParams)); EXPECT_EQ(3u, multiDispatchInfo.size()); for (auto &dispatchInfo : multiDispatchInfo) { @@ -342,8 +344,9 @@ TEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxDispatchInfoForAuxTranslatio EXPECT_NE(builtinKernels[1], builtinKernels[2]); } -TEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxTranslationDispatchThenPickDifferentKernelsDependingOnRequest) { - BuiltinDispatchInfoBuilder &builder = pBuiltIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pContext, *pDevice); +HWTEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxTranslationDispatchThenPickDifferentKernelsDependingOnRequest) { + BuiltinDispatchInfoBuilder &baseBuilder = pBuiltIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pContext, *pDevice); + auto &builder = static_cast &>(baseBuilder); MemObjsForAuxTranslation memObjsForAuxTranslation; MockBuffer mockBuffer[3]; @@ -358,10 +361,10 @@ TEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxTranslationDispatchThenPickD } builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::AuxToNonAux; - EXPECT_TRUE(builder.buildDispatchInfos(multiDispatchInfo, builtinOpsParams)); + EXPECT_TRUE(builder.buildDispatchInfosForAuxTranslation(multiDispatchInfo, builtinOpsParams)); builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::NonAuxToAux; - EXPECT_TRUE(builder.buildDispatchInfos(multiDispatchInfo, builtinOpsParams)); + EXPECT_TRUE(builder.buildDispatchInfosForAuxTranslation(multiDispatchInfo, builtinOpsParams)); EXPECT_EQ(6u, multiDispatchInfo.size()); @@ -376,8 +379,9 @@ TEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxTranslationDispatchThenPickD EXPECT_NE(builtinKernels[2], builtinKernels[5]); } -TEST_F(BuiltInTests, givenInvalidAuxTranslationDirectionWhenBuildingDispatchInfosThenAbort) { - BuiltinDispatchInfoBuilder &builder = pBuiltIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pContext, *pDevice); +HWTEST_F(BuiltInTests, givenInvalidAuxTranslationDirectionWhenBuildingDispatchInfosThenAbort) { + BuiltinDispatchInfoBuilder &baseBuilder = pBuiltIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pContext, *pDevice); + auto &builder = static_cast &>(baseBuilder); MemObjsForAuxTranslation memObjsForAuxTranslation; MockBuffer mockBuffer; @@ -389,7 +393,7 @@ TEST_F(BuiltInTests, givenInvalidAuxTranslationDirectionWhenBuildingDispatchInfo memObjsForAuxTranslation.insert(&mockBuffer); builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::None; - EXPECT_THROW(builder.buildDispatchInfos(multiDispatchInfo, builtinOpsParams), std::exception); + EXPECT_THROW(builder.buildDispatchInfosForAuxTranslation(multiDispatchInfo, builtinOpsParams), std::exception); } class MockAuxBuilInOp : public BuiltInOp { @@ -411,7 +415,7 @@ TEST_F(BuiltInTests, whenAuxBuiltInIsConstructedThenResizeKernelInstancedTo5) { EXPECT_EQ(5u, mockAuxBuiltInOp.convertToNonAuxKernel.size()); } -TEST_F(BuiltInTests, givenMoreBuffersForAuxTranslationThanKernelInstancesWhenDispatchingThenResize) { +HWTEST_F(BuiltInTests, givenMoreBuffersForAuxTranslationThanKernelInstancesWhenDispatchingThenResize) { MockAuxBuilInOp mockAuxBuiltInOp(*pBuiltIns, *pContext, *pDevice); EXPECT_EQ(5u, mockAuxBuiltInOp.convertToAuxKernel.size()); EXPECT_EQ(5u, mockAuxBuiltInOp.convertToNonAuxKernel.size()); @@ -428,7 +432,7 @@ TEST_F(BuiltInTests, givenMoreBuffersForAuxTranslationThanKernelInstancesWhenDis memObjsForAuxTranslation.insert(&buffer); } - EXPECT_TRUE(mockAuxBuiltInOp.buildDispatchInfos(multiDispatchInfo, builtinOpsParams)); + EXPECT_TRUE(mockAuxBuiltInOp.buildDispatchInfosForAuxTranslation(multiDispatchInfo, builtinOpsParams)); EXPECT_EQ(7u, mockAuxBuiltInOp.convertToAuxKernel.size()); EXPECT_EQ(7u, mockAuxBuiltInOp.convertToNonAuxKernel.size()); } diff --git a/unit_tests/command_queue/dispatch_walker_tests.cpp b/unit_tests/command_queue/dispatch_walker_tests.cpp index 6b38853bdc..08c579f7fa 100644 --- a/unit_tests/command_queue/dispatch_walker_tests.cpp +++ b/unit_tests/command_queue/dispatch_walker_tests.cpp @@ -5,6 +5,7 @@ * */ +#include "runtime/built_ins/aux_translation_builtin.h" #include "runtime/command_queue/gpgpu_walker.h" #include "runtime/command_queue/hardware_interface.h" #include "runtime/event/perf_counter.h" @@ -18,6 +19,7 @@ #include "unit_tests/fixtures/device_fixture.h" #include "unit_tests/helpers/debug_manager_state_restore.h" #include "unit_tests/helpers/hw_parse.h" +#include "unit_tests/mocks/mock_buffer.h" #include "unit_tests/mocks/mock_command_queue.h" #include "unit_tests/mocks/mock_graphics_allocation.h" #include "unit_tests/mocks/mock_kernel.h" @@ -1266,7 +1268,13 @@ TEST(DispatchWalker, calculateDispatchDim) { } } -HWTEST_F(DispatchWalkerTest, givenKernelWhenAuxTranslationRequiredThenPipeControlWithStallAndDCFlushAdded) { +HWTEST_F(DispatchWalkerTest, givenKernelWhenAuxToNonAuxWhenTranslationRequiredThenPipeControlWithStallAndDCFlushAdded) { + MockContext context; + auto executionEnvironment = pDevice->getExecutionEnvironment(); + auto builtIns = executionEnvironment->getBuiltIns(); + BuiltinDispatchInfoBuilder &baseBuilder = builtIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, context, *pDevice); + auto &builder = static_cast &>(baseBuilder); + MockKernel kernel(program.get(), kernelInfo, *pDevice); kernelInfo.workloadInfo.workDimOffset = 0; ASSERT_EQ(CL_SUCCESS, kernel.initialize()); @@ -1274,11 +1282,18 @@ HWTEST_F(DispatchWalkerTest, givenKernelWhenAuxTranslationRequiredThenPipeContro auto &cmdStream = pCmdQ->getCS(0); void *buffer = cmdStream.getCpuBase(); kernel.auxTranslationRequired = true; + MockBuffer mockBuffer[2]; - MockMultiDispatchInfo multiDispatchInfo(&kernel); - DispatchInfo di1(&kernel, 1, Vec3(1, 1, 1), Vec3(1, 1, 1), Vec3(0, 0, 0)); - di1.setPipeControlRequired(true); - multiDispatchInfo.push(di1); + MultiDispatchInfo multiDispatchInfo; + MemObjsForAuxTranslation memObjsForAuxTranslation; + memObjsForAuxTranslation.insert(&mockBuffer[0]); + memObjsForAuxTranslation.insert(&mockBuffer[1]); + + BuiltinOpParams builtinOpsParams; + builtinOpsParams.memObjsForAuxTranslation = &memObjsForAuxTranslation; + builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::AuxToNonAux; + + builder.buildDispatchInfosForAuxTranslation(multiDispatchInfo, builtinOpsParams); HardwareInterface::dispatchWalker( *pCmdQ, @@ -1305,7 +1320,66 @@ HWTEST_F(DispatchWalkerTest, givenKernelWhenAuxTranslationRequiredThenPipeContro EXPECT_TRUE(beginPipeControl->getCommandStreamerStallEnable()); auto endPipeControl = genCmdCast(*(pipeControls[1])); - EXPECT_FALSE(endPipeControl->getDcFlushEnable()); + bool dcFlushRequired = (executionEnvironment->getHardwareInfo()->platform.eRenderCoreFamily == IGFX_GEN8_CORE); + EXPECT_EQ(dcFlushRequired, endPipeControl->getDcFlushEnable()); + EXPECT_TRUE(endPipeControl->getCommandStreamerStallEnable()); +} + +HWTEST_F(DispatchWalkerTest, givenKernelWhenNonAuxToAuxWhenTranslationRequiredThenPipeControlWithStallAdded) { + MockContext context; + auto executionEnvironment = pDevice->getExecutionEnvironment(); + auto builtIns = executionEnvironment->getBuiltIns(); + BuiltinDispatchInfoBuilder &baseBuilder = builtIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, context, *pDevice); + auto &builder = static_cast &>(baseBuilder); + + MockKernel kernel(program.get(), kernelInfo, *pDevice); + kernelInfo.workloadInfo.workDimOffset = 0; + ASSERT_EQ(CL_SUCCESS, kernel.initialize()); + + auto &cmdStream = pCmdQ->getCS(0); + void *buffer = cmdStream.getCpuBase(); + kernel.auxTranslationRequired = true; + MockBuffer mockBuffer[2]; + + MultiDispatchInfo multiDispatchInfo; + MemObjsForAuxTranslation memObjsForAuxTranslation; + memObjsForAuxTranslation.insert(&mockBuffer[0]); + memObjsForAuxTranslation.insert(&mockBuffer[1]); + + BuiltinOpParams builtinOpsParams; + builtinOpsParams.memObjsForAuxTranslation = &memObjsForAuxTranslation; + builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::NonAuxToAux; + + builder.buildDispatchInfosForAuxTranslation(multiDispatchInfo, builtinOpsParams); + + HardwareInterface::dispatchWalker( + *pCmdQ, + multiDispatchInfo, + CsrDependencies(), + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + pDevice->getPreemptionMode(), + false); + + auto sizeUsed = cmdStream.getUsed(); + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, buffer, sizeUsed)); + + auto pipeControls = findAll(cmdList.begin(), cmdList.end()); + + ASSERT_EQ(2u, pipeControls.size()); + + bool dcFlushRequired = (executionEnvironment->getHardwareInfo()->platform.eRenderCoreFamily == IGFX_GEN8_CORE); + + auto beginPipeControl = genCmdCast(*(pipeControls[0])); + EXPECT_EQ(dcFlushRequired, beginPipeControl->getDcFlushEnable()); + EXPECT_TRUE(beginPipeControl->getCommandStreamerStallEnable()); + + auto endPipeControl = genCmdCast(*(pipeControls[1])); + EXPECT_EQ(dcFlushRequired, endPipeControl->getDcFlushEnable()); EXPECT_TRUE(endPipeControl->getCommandStreamerStallEnable()); } diff --git a/unit_tests/command_queue/enqueue_kernel_2_tests.cpp b/unit_tests/command_queue/enqueue_kernel_2_tests.cpp index 5626335181..ad15683e2b 100644 --- a/unit_tests/command_queue/enqueue_kernel_2_tests.cpp +++ b/unit_tests/command_queue/enqueue_kernel_2_tests.cpp @@ -655,6 +655,7 @@ struct EnqueueAuxKernelTests : public EnqueueKernelTest { template class MyCmdQ : public CommandQueueHw { public: + using CommandQueueHw::commandStream; MyCmdQ(Context *context, Device *device) : CommandQueueHw(context, device, nullptr) {} void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, MemObjsForAuxTranslation &memObjsForAuxTranslation, AuxTranslationDirection auxTranslationDirection) override { @@ -733,14 +734,19 @@ HWTEST_F(EnqueueAuxKernelTests, givenMultipleArgsWhenAuxTranslationIsRequiredThe EXPECT_EQ(&buffer2, *std::get(cmdQ.dispatchAuxTranslationInputs.at(0)).begin()); EXPECT_EQ(&buffer2, *std::get(cmdQ.dispatchAuxTranslationInputs.at(1)).begin()); - uint32_t pipeControlCount = 0; - for (auto dispatchInfo : cmdQ.dispatchInfos) { - if (dispatchInfo.isPipeControlRequired()) { - ++pipeControlCount; - } - } - EXPECT_EQ(4u, pipeControlCount); + auto cmdStream = cmdQ.commandStream; + auto sizeUsed = cmdStream->getUsed(); + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, cmdStream->getCpuBase(), sizeUsed)); + + auto pipeControls = findAll(cmdList.begin(), cmdList.end()); + + auto additionalPcCount = PipeControlHelper::getSizeForPipeControlWithPostSyncOperation() / sizeof(typename FamilyType::PIPE_CONTROL); + + // |AuxToNonAux|NDR|NonAuxToAux| + ASSERT_EQ(4u + additionalPcCount, pipeControls.size()); + ASSERT_EQ(2u, cmdQ.auxTranslationDirections.size()); EXPECT_EQ(AuxTranslationDirection::AuxToNonAux, cmdQ.auxTranslationDirections[0]); EXPECT_EQ(AuxTranslationDirection::NonAuxToAux, cmdQ.auxTranslationDirections[1]);