Introduce RegisteredMethodDispatcher.

- Inject dispatch methods per DispatchInfo
- Each DispatchInfo in MultiDispatchInfo can have different behaviour
- Implement AuxTranslation programming with new approach

Change-Id: Ie28de0c72a77b8e91509a5b9b8740d72fedf4ad6
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2019-07-09 14:24:33 +02:00
committed by sys_ocldev
parent a72109d209
commit ddb0d82e83
15 changed files with 241 additions and 104 deletions

View File

@ -9,6 +9,7 @@
#include "runtime/built_ins/built_ins.h"
#include "runtime/built_ins/builtins_dispatch_builder.h"
#include "runtime/helpers/dispatch_info_builder.h"
#include "runtime/helpers/hw_helper.h"
#include <memory>
@ -17,9 +18,57 @@ template <>
class BuiltInOp<EBuiltInOps::AuxTranslation> : public BuiltinDispatchInfoBuilder {
public:
BuiltInOp(BuiltIns &kernelsLib, Context &context, Device &device);
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const override;
template <typename GfxFamily>
bool buildDispatchInfosForAuxTranslation(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const {
size_t kernelInstanceNumber = 0;
size_t numMemObjectsToTranslate = operationParams.memObjsForAuxTranslation->size();
resizeKernelInstances(numMemObjectsToTranslate);
multiDispatchInfo.setBuiltinOpParams(operationParams);
for (auto &memObj : *operationParams.memObjsForAuxTranslation) {
DispatchInfoBuilder<SplitDispatch::Dim::d1D, SplitDispatch::SplitMode::NoSplit> builder;
auto graphicsAllocation = memObj->getGraphicsAllocation();
size_t allocationSize = alignUp(memObj->getSize(), 512);
UNRECOVERABLE_IF(builder.getMaxNumDispatches() != 1);
if (kernelInstanceNumber == 0) {
// Before Kernel
bool dcFlush = (AuxTranslationDirection::AuxToNonAux == operationParams.auxTranslationDirection);
registerPipeControlProgramming<GfxFamily>(builder.getDispatchInfo(0).dispatchInitCommands, dcFlush);
}
if (kernelInstanceNumber == numMemObjectsToTranslate - 1) {
// After Kernel
registerPipeControlProgramming<GfxFamily>(builder.getDispatchInfo(0).dispatchEpilogueCommands, false);
}
if (AuxTranslationDirection::AuxToNonAux == operationParams.auxTranslationDirection) {
builder.setKernel(convertToNonAuxKernel[kernelInstanceNumber++].get());
builder.setArg(0, memObj);
builder.setArgSvm(1, allocationSize, reinterpret_cast<void *>(graphicsAllocation->getGpuAddress()), nullptr, 0u);
} else {
UNRECOVERABLE_IF(AuxTranslationDirection::NonAuxToAux != operationParams.auxTranslationDirection);
builder.setKernel(convertToAuxKernel[kernelInstanceNumber++].get());
builder.setArgSvm(0, allocationSize, reinterpret_cast<void *>(graphicsAllocation->getGpuAddress()), nullptr, 0u);
builder.setArg(1, memObj);
}
size_t xGws = allocationSize / 16;
builder.setDispatchGeometry(Vec3<size_t>{xGws, 0, 0}, Vec3<size_t>{0, 0, 0}, Vec3<size_t>{0, 0, 0});
builder.bake(multiDispatchInfo);
}
return true;
}
protected:
template <typename GfxFamily>
void registerPipeControlProgramming(RegisteredMethodDispatcher<DispatchInfo::DispatchCommandMethodT> &dispatcher, bool dcFlush) const {
auto method = std::bind(PipeControlHelper<GfxFamily>::addPipeControl, std::placeholders::_1, dcFlush);
dispatcher.registerMethod(method);
dispatcher.registerCommandsSizeEstimationMethod(PipeControlHelper<GfxFamily>::getSizeForSinglePipeControl);
}
void resizeKernelInstances(size_t size) const;
Kernel *baseKernel = nullptr;
mutable std::vector<std::unique_ptr<Kernel>> convertToNonAuxKernel;

View File

@ -23,35 +23,6 @@ BuiltInOp<EBuiltInOps::AuxTranslation>::BuiltInOp(BuiltIns &kernelsLib, Context
resizeKernelInstances(5);
}
bool BuiltInOp<EBuiltInOps::AuxTranslation>::buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const {
size_t kernelInstanceNumber = 0;
resizeKernelInstances(operationParams.memObjsForAuxTranslation->size());
multiDispatchInfo.setBuiltinOpParams(operationParams);
for (auto &memObj : *operationParams.memObjsForAuxTranslation) {
DispatchInfoBuilder<SplitDispatch::Dim::d1D, SplitDispatch::SplitMode::NoSplit> builder;
auto graphicsAllocation = memObj->getGraphicsAllocation();
size_t allocationSize = alignUp(memObj->getSize(), 512);
if (AuxTranslationDirection::AuxToNonAux == operationParams.auxTranslationDirection) {
builder.setKernel(convertToNonAuxKernel.at(kernelInstanceNumber++).get());
builder.setArg(0, memObj);
builder.setArgSvm(1, allocationSize, reinterpret_cast<void *>(graphicsAllocation->getGpuAddress()), nullptr, 0u);
} else {
UNRECOVERABLE_IF(AuxTranslationDirection::NonAuxToAux != operationParams.auxTranslationDirection);
builder.setKernel(convertToAuxKernel.at(kernelInstanceNumber++).get());
builder.setArgSvm(0, allocationSize, reinterpret_cast<void *>(graphicsAllocation->getGpuAddress()), nullptr, 0u);
builder.setArg(1, memObj);
}
size_t xGws = allocationSize / 16;
builder.setDispatchGeometry(Vec3<size_t>{xGws, 0, 0}, Vec3<size_t>{0, 0, 0}, Vec3<size_t>{0, 0, 0});
builder.bake(multiDispatchInfo);
}
return true;
}
void BuiltInOp<EBuiltInOps::AuxTranslation>::resizeKernelInstances(size_t size) const {
convertToNonAuxKernel.reserve(size);
convertToAuxKernel.reserve(size);

View File

@ -521,22 +521,6 @@ void CommandQueue::releaseIndirectHeap(IndirectHeap::Type heapType) {
getCommandStreamReceiver().releaseIndirectHeap(heapType);
}
void CommandQueue::dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, MemObjsForAuxTranslation &memObjsForAuxTranslation,
AuxTranslationDirection auxTranslationDirection) {
if (!multiDispatchInfo.empty()) {
multiDispatchInfo.rbegin()->setPipeControlRequired(true);
}
auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getContext(), getDevice());
BuiltinOpParams dispatchParams;
dispatchParams.memObjsForAuxTranslation = &memObjsForAuxTranslation;
dispatchParams.auxTranslationDirection = auxTranslationDirection;
builder.buildDispatchInfos(multiDispatchInfo, dispatchParams);
multiDispatchInfo.rbegin()->setPipeControlRequired(true);
}
void CommandQueue::obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes, bool clearAllDependencies) {
auto allocator = getCommandStreamReceiver().getTimestampPacketAllocator();

View File

@ -429,9 +429,6 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
virtual void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType){};
MOCKABLE_VIRTUAL void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, MemObjsForAuxTranslation &memObjsForAuxTranslation,
AuxTranslationDirection auxTranslationDirection);
MOCKABLE_VIRTUAL void obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes, bool clearAllDependencies);
void processProperties(const cl_queue_properties *properties);
bool bufferCpuCopyAllowed(Buffer *buffer, cl_command_type commandType, cl_bool blocking, size_t size, void *ptr,

View File

@ -382,6 +382,9 @@ class CommandQueueHw : public CommandQueue {
cl_int enqueueMarkerForReadWriteOperation(MemObj *memObj, void *ptr, cl_command_type commandType, cl_bool blocking, cl_uint numEventsInWaitList,
const cl_event *eventWaitList, cl_event *event);
MOCKABLE_VIRTUAL void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, MemObjsForAuxTranslation &memObjsForAuxTranslation,
AuxTranslationDirection auxTranslationDirection);
private:
bool isTaskLevelUpdateRequired(const uint32_t &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType);
void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType) override;

View File

@ -5,6 +5,7 @@
*
*/
#include "runtime/built_ins/aux_translation_builtin.h"
#include "runtime/command_queue/enqueue_barrier.h"
#include "runtime/command_queue/enqueue_copy_buffer.h"
#include "runtime/command_queue/enqueue_copy_buffer_rect.h"
@ -99,4 +100,17 @@ cl_int CommandQueueHw<Family>::enqueueMarkerForReadWriteOperation(MemObj *memObj
return CL_SUCCESS;
}
template <typename Family>
void CommandQueueHw<Family>::dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, MemObjsForAuxTranslation &memObjsForAuxTranslation,
AuxTranslationDirection auxTranslationDirection) {
auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getContext(), getDevice());
auto &auxTranslationBuilder = static_cast<BuiltInOp<EBuiltInOps::AuxTranslation> &>(builder);
BuiltinOpParams dispatchParams;
dispatchParams.memObjsForAuxTranslation = &memObjsForAuxTranslation;
dispatchParams.auxTranslationDirection = auxTranslationDirection;
auxTranslationBuilder.buildDispatchInfosForAuxTranslation<Family>(multiDispatchInfo, dispatchParams);
}
} // namespace NEO

View File

@ -196,14 +196,10 @@ size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, c
size_t expectedSizeCS = 0;
Kernel *parentKernel = multiDispatchInfo.peekParentKernel();
if (multiDispatchInfo.peekMainKernel() && multiDispatchInfo.peekMainKernel()->isAuxTranslationRequired()) {
expectedSizeCS += sizeof(PIPE_CONTROL);
}
for (auto &dispatchInfo : multiDispatchInfo) {
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, dispatchInfo.getKernel());
if (dispatchInfo.isPipeControlRequired()) {
expectedSizeCS += sizeof(PIPE_CONTROL);
}
expectedSizeCS += dispatchInfo.dispatchInitCommands.estimateCommandsSize();
expectedSizeCS += dispatchInfo.dispatchEpilogueCommands.estimateCommandsSize();
}
if (parentKernel) {
SchedulerKernel &scheduler = commandQueue.getDevice().getExecutionEnvironment()->getBuiltIns()->getSchedulerKernel(parentKernel->getContext());

View File

@ -114,18 +114,11 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
DEBUG_BREAK_IF(offsetInterfaceDescriptorTable % 64 != 0);
if (mainKernel->isAuxTranslationRequired()) {
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
auto pPipeControlCmd = static_cast<PIPE_CONTROL *>(commandStream->getSpace(sizeof(PIPE_CONTROL)));
*pPipeControlCmd = GfxFamily::cmdInitPipeControl;
pPipeControlCmd->setDcFlushEnable(true);
pPipeControlCmd->setCommandStreamerStallEnable(true);
}
dispatchProfilingPerfStartCommands(hwTimeStamps, hwPerfCounter, commandStream, commandQueue);
size_t currentDispatchIndex = 0;
for (auto &dispatchInfo : multiDispatchInfo) {
dispatchInfo.dispatchInitCommands(*commandStream);
auto &kernel = *dispatchInfo.getKernel();
DEBUG_BREAK_IF(!(dispatchInfo.getDim() >= 1 && dispatchInfo.getDim() <= 3));
DEBUG_BREAK_IF(!(dispatchInfo.getGWS().z == 1 || dispatchInfo.getDim() == 3));
@ -203,14 +196,9 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
offsetInterfaceDescriptorTable, numberOfWorkgroups, startOfWorkgroups);
dispatchWorkarounds(commandStream, commandQueue, kernel, false);
if (dispatchInfo.isPipeControlRequired()) {
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
auto pPipeControlCmd = static_cast<PIPE_CONTROL *>(commandStream->getSpace(sizeof(PIPE_CONTROL)));
*pPipeControlCmd = GfxFamily::cmdInitPipeControl;
pPipeControlCmd->setCommandStreamerStallEnable(true);
}
currentDispatchIndex++;
dispatchInfo.dispatchEpilogueCommands(*commandStream);
}
if (mainKernel->requiresCacheFlushCommand(commandQueue)) {
uint64_t postSyncAddress = 0;

View File

@ -73,6 +73,7 @@ set(RUNTIME_SRCS_HELPERS_BASE
${CMAKE_CURRENT_SOURCE_DIR}/properties_helper.cpp
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/queue_helpers.cpp
${CMAKE_CURRENT_SOURCE_DIR}/queue_helpers.h
${CMAKE_CURRENT_SOURCE_DIR}/registered_method_dispatcher.h
${CMAKE_CURRENT_SOURCE_DIR}/sampler_helpers.h
${CMAKE_CURRENT_SOURCE_DIR}/state_base_address.h
${CMAKE_CURRENT_SOURCE_DIR}/state_base_address_base.inl

View File

@ -9,6 +9,7 @@
#include "core/helpers/vec.h"
#include "runtime/built_ins/builtins_dispatch_builder.h"
#include "runtime/helpers/registered_method_dispatcher.h"
#include "runtime/mem_obj/mem_obj.h"
#include "runtime/memory_manager/surface.h"
#include "runtime/utilities/stackvec.h"
@ -21,14 +22,15 @@ namespace NEO {
class Kernel;
class DispatchInfo {
public:
using DispatchCommandMethodT = void(LinearStream &commandStream);
DispatchInfo() = default;
DispatchInfo(Kernel *kernel, uint32_t dim, Vec3<size_t> gws, Vec3<size_t> elws, Vec3<size_t> offset)
: kernel(kernel), dim(dim), gws(gws), elws(elws), offset(offset) {}
DispatchInfo(Kernel *kernel, uint32_t dim, Vec3<size_t> gws, Vec3<size_t> elws, Vec3<size_t> offset, Vec3<size_t> agws, Vec3<size_t> lws, Vec3<size_t> twgs, Vec3<size_t> nwgs, Vec3<size_t> swgs)
: kernel(kernel), dim(dim), gws(gws), elws(elws), offset(offset), agws(agws), lws(lws), twgs(twgs), nwgs(nwgs), swgs(swgs) {}
bool isPipeControlRequired() const { return pipeControlRequired; }
void setPipeControlRequired(bool blocking) { this->pipeControlRequired = blocking; }
bool usesSlm() const;
bool usesStatelessPrintfSurface() const;
uint32_t getRequiredScratchSize() const;
@ -56,8 +58,10 @@ class DispatchInfo {
bool peekCanBePartitioned() const { return canBePartitioned; }
void setCanBePartitioned(bool canBePartitioned) { this->canBePartitioned = canBePartitioned; }
RegisteredMethodDispatcher<DispatchCommandMethodT> dispatchInitCommands;
RegisteredMethodDispatcher<DispatchCommandMethodT> dispatchEpilogueCommands;
protected:
bool pipeControlRequired = false;
bool canBePartitioned = false;
Kernel *kernel = nullptr;
uint32_t dim = 0;

View File

@ -279,6 +279,9 @@ class DispatchInfoBuilder {
}
}
DispatchInfo &getDispatchInfo(size_t index) { return dispatchInfos[index]; }
static constexpr size_t getMaxNumDispatches() { return numDispatches; }
protected:
static bool supportsSplit() {
return (Mode == SplitDispatch::SplitMode::WalkerSplit);
@ -420,10 +423,6 @@ class DispatchInfoBuilder {
return static_cast<uint32_t>(x);
}
static constexpr size_t getMaxNumDispatches() {
return numDispatches;
}
static const size_t numDispatches = (Mode == SplitDispatch::SplitMode::WalkerSplit) ? 1 : powConst((static_cast<uint32_t>(Mode) + 1), // 1 (middle) 2 (middle + right/bottom) or 3 (lef/top + middle + right/mottom)
(static_cast<uint32_t>(Dim) + 1)); // 1, 2 or 3

View File

@ -0,0 +1,47 @@
/*
* Copyright (C) 2017-2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <cstdint>
#include <functional>
namespace NEO {
template <typename ArgsT>
class RegisteredMethodDispatcher {
public:
using CommandsSizeEstimationMethodT = std::function<size_t(void)>;
using RegisteredMethodT = std::function<ArgsT>;
void registerMethod(RegisteredMethodT method) {
this->method = method;
}
void registerCommandsSizeEstimationMethod(CommandsSizeEstimationMethodT method) {
this->commandsEstimationMethod = method;
}
template <typename... Args>
void operator()(Args &&... args) const {
if (method) {
method(std::forward<Args>(args)...);
}
}
size_t estimateCommandsSize() const {
if (commandsEstimationMethod) {
return commandsEstimationMethod();
}
return 0;
}
protected:
CommandsSizeEstimationMethodT commandsEstimationMethod;
RegisteredMethodT method;
};
} // namespace NEO

View File

@ -246,8 +246,9 @@ TEST_F(BuiltInTests, BuiltinDispatchInfoBuilderCopyBufferToBuffer) {
delete dstPtr;
}
TEST_F(BuiltInTests, givenInputBufferWhenBuildingNonAuxDispatchInfoForAuxTranslationThenPickAndSetupCorrectKernels) {
BuiltinDispatchInfoBuilder &builder = pBuiltIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pContext, *pDevice);
HWTEST_F(BuiltInTests, givenInputBufferWhenBuildingNonAuxDispatchInfoForAuxTranslationThenPickAndSetupCorrectKernels) {
BuiltinDispatchInfoBuilder &baseBuilder = pBuiltIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pContext, *pDevice);
auto &builder = static_cast<BuiltInOp<EBuiltInOps::AuxTranslation> &>(baseBuilder);
MemObjsForAuxTranslation memObjsForAuxTranslation;
MultiDispatchInfo multiDispatchInfo;
@ -265,7 +266,7 @@ TEST_F(BuiltInTests, givenInputBufferWhenBuildingNonAuxDispatchInfoForAuxTransla
memObjsForAuxTranslation.insert(&buffer);
}
EXPECT_TRUE(builder.buildDispatchInfos(multiDispatchInfo, builtinOpsParams));
EXPECT_TRUE(builder.buildDispatchInfosForAuxTranslation<FamilyType>(multiDispatchInfo, builtinOpsParams));
EXPECT_EQ(3u, multiDispatchInfo.size());
for (auto &dispatchInfo : multiDispatchInfo) {
@ -294,8 +295,9 @@ TEST_F(BuiltInTests, givenInputBufferWhenBuildingNonAuxDispatchInfoForAuxTransla
EXPECT_NE(builtinKernels[1], builtinKernels[2]);
}
TEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxDispatchInfoForAuxTranslationThenPickAndSetupCorrectKernels) {
BuiltinDispatchInfoBuilder &builder = pBuiltIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pContext, *pDevice);
HWTEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxDispatchInfoForAuxTranslationThenPickAndSetupCorrectKernels) {
BuiltinDispatchInfoBuilder &baseBuilder = pBuiltIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pContext, *pDevice);
auto &builder = static_cast<BuiltInOp<EBuiltInOps::AuxTranslation> &>(baseBuilder);
MemObjsForAuxTranslation memObjsForAuxTranslation;
MultiDispatchInfo multiDispatchInfo;
@ -313,7 +315,7 @@ TEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxDispatchInfoForAuxTranslatio
memObjsForAuxTranslation.insert(&buffer);
}
EXPECT_TRUE(builder.buildDispatchInfos(multiDispatchInfo, builtinOpsParams));
EXPECT_TRUE(builder.buildDispatchInfosForAuxTranslation<FamilyType>(multiDispatchInfo, builtinOpsParams));
EXPECT_EQ(3u, multiDispatchInfo.size());
for (auto &dispatchInfo : multiDispatchInfo) {
@ -342,8 +344,9 @@ TEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxDispatchInfoForAuxTranslatio
EXPECT_NE(builtinKernels[1], builtinKernels[2]);
}
TEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxTranslationDispatchThenPickDifferentKernelsDependingOnRequest) {
BuiltinDispatchInfoBuilder &builder = pBuiltIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pContext, *pDevice);
HWTEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxTranslationDispatchThenPickDifferentKernelsDependingOnRequest) {
BuiltinDispatchInfoBuilder &baseBuilder = pBuiltIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pContext, *pDevice);
auto &builder = static_cast<BuiltInOp<EBuiltInOps::AuxTranslation> &>(baseBuilder);
MemObjsForAuxTranslation memObjsForAuxTranslation;
MockBuffer mockBuffer[3];
@ -358,10 +361,10 @@ TEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxTranslationDispatchThenPickD
}
builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::AuxToNonAux;
EXPECT_TRUE(builder.buildDispatchInfos(multiDispatchInfo, builtinOpsParams));
EXPECT_TRUE(builder.buildDispatchInfosForAuxTranslation<FamilyType>(multiDispatchInfo, builtinOpsParams));
builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::NonAuxToAux;
EXPECT_TRUE(builder.buildDispatchInfos(multiDispatchInfo, builtinOpsParams));
EXPECT_TRUE(builder.buildDispatchInfosForAuxTranslation<FamilyType>(multiDispatchInfo, builtinOpsParams));
EXPECT_EQ(6u, multiDispatchInfo.size());
@ -376,8 +379,9 @@ TEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxTranslationDispatchThenPickD
EXPECT_NE(builtinKernels[2], builtinKernels[5]);
}
TEST_F(BuiltInTests, givenInvalidAuxTranslationDirectionWhenBuildingDispatchInfosThenAbort) {
BuiltinDispatchInfoBuilder &builder = pBuiltIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pContext, *pDevice);
HWTEST_F(BuiltInTests, givenInvalidAuxTranslationDirectionWhenBuildingDispatchInfosThenAbort) {
BuiltinDispatchInfoBuilder &baseBuilder = pBuiltIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pContext, *pDevice);
auto &builder = static_cast<BuiltInOp<EBuiltInOps::AuxTranslation> &>(baseBuilder);
MemObjsForAuxTranslation memObjsForAuxTranslation;
MockBuffer mockBuffer;
@ -389,7 +393,7 @@ TEST_F(BuiltInTests, givenInvalidAuxTranslationDirectionWhenBuildingDispatchInfo
memObjsForAuxTranslation.insert(&mockBuffer);
builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::None;
EXPECT_THROW(builder.buildDispatchInfos(multiDispatchInfo, builtinOpsParams), std::exception);
EXPECT_THROW(builder.buildDispatchInfosForAuxTranslation<FamilyType>(multiDispatchInfo, builtinOpsParams), std::exception);
}
class MockAuxBuilInOp : public BuiltInOp<EBuiltInOps::AuxTranslation> {
@ -411,7 +415,7 @@ TEST_F(BuiltInTests, whenAuxBuiltInIsConstructedThenResizeKernelInstancedTo5) {
EXPECT_EQ(5u, mockAuxBuiltInOp.convertToNonAuxKernel.size());
}
TEST_F(BuiltInTests, givenMoreBuffersForAuxTranslationThanKernelInstancesWhenDispatchingThenResize) {
HWTEST_F(BuiltInTests, givenMoreBuffersForAuxTranslationThanKernelInstancesWhenDispatchingThenResize) {
MockAuxBuilInOp mockAuxBuiltInOp(*pBuiltIns, *pContext, *pDevice);
EXPECT_EQ(5u, mockAuxBuiltInOp.convertToAuxKernel.size());
EXPECT_EQ(5u, mockAuxBuiltInOp.convertToNonAuxKernel.size());
@ -428,7 +432,7 @@ TEST_F(BuiltInTests, givenMoreBuffersForAuxTranslationThanKernelInstancesWhenDis
memObjsForAuxTranslation.insert(&buffer);
}
EXPECT_TRUE(mockAuxBuiltInOp.buildDispatchInfos(multiDispatchInfo, builtinOpsParams));
EXPECT_TRUE(mockAuxBuiltInOp.buildDispatchInfosForAuxTranslation<FamilyType>(multiDispatchInfo, builtinOpsParams));
EXPECT_EQ(7u, mockAuxBuiltInOp.convertToAuxKernel.size());
EXPECT_EQ(7u, mockAuxBuiltInOp.convertToNonAuxKernel.size());
}

View File

@ -5,6 +5,7 @@
*
*/
#include "runtime/built_ins/aux_translation_builtin.h"
#include "runtime/command_queue/gpgpu_walker.h"
#include "runtime/command_queue/hardware_interface.h"
#include "runtime/event/perf_counter.h"
@ -18,6 +19,7 @@
#include "unit_tests/fixtures/device_fixture.h"
#include "unit_tests/helpers/debug_manager_state_restore.h"
#include "unit_tests/helpers/hw_parse.h"
#include "unit_tests/mocks/mock_buffer.h"
#include "unit_tests/mocks/mock_command_queue.h"
#include "unit_tests/mocks/mock_graphics_allocation.h"
#include "unit_tests/mocks/mock_kernel.h"
@ -1266,7 +1268,13 @@ TEST(DispatchWalker, calculateDispatchDim) {
}
}
HWTEST_F(DispatchWalkerTest, givenKernelWhenAuxTranslationRequiredThenPipeControlWithStallAndDCFlushAdded) {
HWTEST_F(DispatchWalkerTest, givenKernelWhenAuxToNonAuxWhenTranslationRequiredThenPipeControlWithStallAndDCFlushAdded) {
MockContext context;
auto executionEnvironment = pDevice->getExecutionEnvironment();
auto builtIns = executionEnvironment->getBuiltIns();
BuiltinDispatchInfoBuilder &baseBuilder = builtIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, context, *pDevice);
auto &builder = static_cast<BuiltInOp<EBuiltInOps::AuxTranslation> &>(baseBuilder);
MockKernel kernel(program.get(), kernelInfo, *pDevice);
kernelInfo.workloadInfo.workDimOffset = 0;
ASSERT_EQ(CL_SUCCESS, kernel.initialize());
@ -1274,11 +1282,18 @@ HWTEST_F(DispatchWalkerTest, givenKernelWhenAuxTranslationRequiredThenPipeContro
auto &cmdStream = pCmdQ->getCS(0);
void *buffer = cmdStream.getCpuBase();
kernel.auxTranslationRequired = true;
MockBuffer mockBuffer[2];
MockMultiDispatchInfo multiDispatchInfo(&kernel);
DispatchInfo di1(&kernel, 1, Vec3<size_t>(1, 1, 1), Vec3<size_t>(1, 1, 1), Vec3<size_t>(0, 0, 0));
di1.setPipeControlRequired(true);
multiDispatchInfo.push(di1);
MultiDispatchInfo multiDispatchInfo;
MemObjsForAuxTranslation memObjsForAuxTranslation;
memObjsForAuxTranslation.insert(&mockBuffer[0]);
memObjsForAuxTranslation.insert(&mockBuffer[1]);
BuiltinOpParams builtinOpsParams;
builtinOpsParams.memObjsForAuxTranslation = &memObjsForAuxTranslation;
builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::AuxToNonAux;
builder.buildDispatchInfosForAuxTranslation<FamilyType>(multiDispatchInfo, builtinOpsParams);
HardwareInterface<FamilyType>::dispatchWalker(
*pCmdQ,
@ -1305,7 +1320,66 @@ HWTEST_F(DispatchWalkerTest, givenKernelWhenAuxTranslationRequiredThenPipeContro
EXPECT_TRUE(beginPipeControl->getCommandStreamerStallEnable());
auto endPipeControl = genCmdCast<typename FamilyType::PIPE_CONTROL *>(*(pipeControls[1]));
EXPECT_FALSE(endPipeControl->getDcFlushEnable());
bool dcFlushRequired = (executionEnvironment->getHardwareInfo()->platform.eRenderCoreFamily == IGFX_GEN8_CORE);
EXPECT_EQ(dcFlushRequired, endPipeControl->getDcFlushEnable());
EXPECT_TRUE(endPipeControl->getCommandStreamerStallEnable());
}
HWTEST_F(DispatchWalkerTest, givenKernelWhenNonAuxToAuxWhenTranslationRequiredThenPipeControlWithStallAdded) {
MockContext context;
auto executionEnvironment = pDevice->getExecutionEnvironment();
auto builtIns = executionEnvironment->getBuiltIns();
BuiltinDispatchInfoBuilder &baseBuilder = builtIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, context, *pDevice);
auto &builder = static_cast<BuiltInOp<EBuiltInOps::AuxTranslation> &>(baseBuilder);
MockKernel kernel(program.get(), kernelInfo, *pDevice);
kernelInfo.workloadInfo.workDimOffset = 0;
ASSERT_EQ(CL_SUCCESS, kernel.initialize());
auto &cmdStream = pCmdQ->getCS(0);
void *buffer = cmdStream.getCpuBase();
kernel.auxTranslationRequired = true;
MockBuffer mockBuffer[2];
MultiDispatchInfo multiDispatchInfo;
MemObjsForAuxTranslation memObjsForAuxTranslation;
memObjsForAuxTranslation.insert(&mockBuffer[0]);
memObjsForAuxTranslation.insert(&mockBuffer[1]);
BuiltinOpParams builtinOpsParams;
builtinOpsParams.memObjsForAuxTranslation = &memObjsForAuxTranslation;
builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::NonAuxToAux;
builder.buildDispatchInfosForAuxTranslation<FamilyType>(multiDispatchInfo, builtinOpsParams);
HardwareInterface<FamilyType>::dispatchWalker(
*pCmdQ,
multiDispatchInfo,
CsrDependencies(),
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
pDevice->getPreemptionMode(),
false);
auto sizeUsed = cmdStream.getUsed();
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, buffer, sizeUsed));
auto pipeControls = findAll<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(2u, pipeControls.size());
bool dcFlushRequired = (executionEnvironment->getHardwareInfo()->platform.eRenderCoreFamily == IGFX_GEN8_CORE);
auto beginPipeControl = genCmdCast<typename FamilyType::PIPE_CONTROL *>(*(pipeControls[0]));
EXPECT_EQ(dcFlushRequired, beginPipeControl->getDcFlushEnable());
EXPECT_TRUE(beginPipeControl->getCommandStreamerStallEnable());
auto endPipeControl = genCmdCast<typename FamilyType::PIPE_CONTROL *>(*(pipeControls[1]));
EXPECT_EQ(dcFlushRequired, endPipeControl->getDcFlushEnable());
EXPECT_TRUE(endPipeControl->getCommandStreamerStallEnable());
}

View File

@ -655,6 +655,7 @@ struct EnqueueAuxKernelTests : public EnqueueKernelTest {
template <typename FamilyType>
class MyCmdQ : public CommandQueueHw<FamilyType> {
public:
using CommandQueueHw<FamilyType>::commandStream;
MyCmdQ(Context *context, Device *device) : CommandQueueHw<FamilyType>(context, device, nullptr) {}
void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, MemObjsForAuxTranslation &memObjsForAuxTranslation,
AuxTranslationDirection auxTranslationDirection) override {
@ -733,14 +734,19 @@ HWTEST_F(EnqueueAuxKernelTests, givenMultipleArgsWhenAuxTranslationIsRequiredThe
EXPECT_EQ(&buffer2, *std::get<MemObjsForAuxTranslation>(cmdQ.dispatchAuxTranslationInputs.at(0)).begin());
EXPECT_EQ(&buffer2, *std::get<MemObjsForAuxTranslation>(cmdQ.dispatchAuxTranslationInputs.at(1)).begin());
uint32_t pipeControlCount = 0;
for (auto dispatchInfo : cmdQ.dispatchInfos) {
if (dispatchInfo.isPipeControlRequired()) {
++pipeControlCount;
}
}
EXPECT_EQ(4u, pipeControlCount);
auto cmdStream = cmdQ.commandStream;
auto sizeUsed = cmdStream->getUsed();
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, cmdStream->getCpuBase(), sizeUsed));
auto pipeControls = findAll<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
auto additionalPcCount = PipeControlHelper<FamilyType>::getSizeForPipeControlWithPostSyncOperation() / sizeof(typename FamilyType::PIPE_CONTROL);
// |AuxToNonAux|NDR|NonAuxToAux|
ASSERT_EQ(4u + additionalPcCount, pipeControls.size());
ASSERT_EQ(2u, cmdQ.auxTranslationDirections.size());
EXPECT_EQ(AuxTranslationDirection::AuxToNonAux, cmdQ.auxTranslationDirections[0]);
EXPECT_EQ(AuxTranslationDirection::NonAuxToAux, cmdQ.auxTranslationDirections[1]);