mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Introduce RegisteredMethodDispatcher.
- Inject dispatch methods per DispatchInfo - Each DispatchInfo in MultiDispatchInfo can have different behaviour - Implement AuxTranslation programming with new approach Change-Id: Ie28de0c72a77b8e91509a5b9b8740d72fedf4ad6 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:

committed by
sys_ocldev

parent
a72109d209
commit
ddb0d82e83
@ -9,6 +9,7 @@
|
||||
#include "runtime/built_ins/built_ins.h"
|
||||
#include "runtime/built_ins/builtins_dispatch_builder.h"
|
||||
#include "runtime/helpers/dispatch_info_builder.h"
|
||||
#include "runtime/helpers/hw_helper.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
@ -17,9 +18,57 @@ template <>
|
||||
class BuiltInOp<EBuiltInOps::AuxTranslation> : public BuiltinDispatchInfoBuilder {
|
||||
public:
|
||||
BuiltInOp(BuiltIns &kernelsLib, Context &context, Device &device);
|
||||
bool buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const override;
|
||||
template <typename GfxFamily>
|
||||
bool buildDispatchInfosForAuxTranslation(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const {
|
||||
size_t kernelInstanceNumber = 0;
|
||||
size_t numMemObjectsToTranslate = operationParams.memObjsForAuxTranslation->size();
|
||||
resizeKernelInstances(numMemObjectsToTranslate);
|
||||
multiDispatchInfo.setBuiltinOpParams(operationParams);
|
||||
|
||||
for (auto &memObj : *operationParams.memObjsForAuxTranslation) {
|
||||
DispatchInfoBuilder<SplitDispatch::Dim::d1D, SplitDispatch::SplitMode::NoSplit> builder;
|
||||
auto graphicsAllocation = memObj->getGraphicsAllocation();
|
||||
size_t allocationSize = alignUp(memObj->getSize(), 512);
|
||||
|
||||
UNRECOVERABLE_IF(builder.getMaxNumDispatches() != 1);
|
||||
|
||||
if (kernelInstanceNumber == 0) {
|
||||
// Before Kernel
|
||||
bool dcFlush = (AuxTranslationDirection::AuxToNonAux == operationParams.auxTranslationDirection);
|
||||
registerPipeControlProgramming<GfxFamily>(builder.getDispatchInfo(0).dispatchInitCommands, dcFlush);
|
||||
}
|
||||
if (kernelInstanceNumber == numMemObjectsToTranslate - 1) {
|
||||
// After Kernel
|
||||
registerPipeControlProgramming<GfxFamily>(builder.getDispatchInfo(0).dispatchEpilogueCommands, false);
|
||||
}
|
||||
|
||||
if (AuxTranslationDirection::AuxToNonAux == operationParams.auxTranslationDirection) {
|
||||
builder.setKernel(convertToNonAuxKernel[kernelInstanceNumber++].get());
|
||||
builder.setArg(0, memObj);
|
||||
builder.setArgSvm(1, allocationSize, reinterpret_cast<void *>(graphicsAllocation->getGpuAddress()), nullptr, 0u);
|
||||
} else {
|
||||
UNRECOVERABLE_IF(AuxTranslationDirection::NonAuxToAux != operationParams.auxTranslationDirection);
|
||||
builder.setKernel(convertToAuxKernel[kernelInstanceNumber++].get());
|
||||
builder.setArgSvm(0, allocationSize, reinterpret_cast<void *>(graphicsAllocation->getGpuAddress()), nullptr, 0u);
|
||||
builder.setArg(1, memObj);
|
||||
}
|
||||
|
||||
size_t xGws = allocationSize / 16;
|
||||
|
||||
builder.setDispatchGeometry(Vec3<size_t>{xGws, 0, 0}, Vec3<size_t>{0, 0, 0}, Vec3<size_t>{0, 0, 0});
|
||||
builder.bake(multiDispatchInfo);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
template <typename GfxFamily>
|
||||
void registerPipeControlProgramming(RegisteredMethodDispatcher<DispatchInfo::DispatchCommandMethodT> &dispatcher, bool dcFlush) const {
|
||||
auto method = std::bind(PipeControlHelper<GfxFamily>::addPipeControl, std::placeholders::_1, dcFlush);
|
||||
dispatcher.registerMethod(method);
|
||||
dispatcher.registerCommandsSizeEstimationMethod(PipeControlHelper<GfxFamily>::getSizeForSinglePipeControl);
|
||||
}
|
||||
void resizeKernelInstances(size_t size) const;
|
||||
Kernel *baseKernel = nullptr;
|
||||
mutable std::vector<std::unique_ptr<Kernel>> convertToNonAuxKernel;
|
||||
|
@ -23,35 +23,6 @@ BuiltInOp<EBuiltInOps::AuxTranslation>::BuiltInOp(BuiltIns &kernelsLib, Context
|
||||
resizeKernelInstances(5);
|
||||
}
|
||||
|
||||
bool BuiltInOp<EBuiltInOps::AuxTranslation>::buildDispatchInfos(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const {
|
||||
size_t kernelInstanceNumber = 0;
|
||||
resizeKernelInstances(operationParams.memObjsForAuxTranslation->size());
|
||||
multiDispatchInfo.setBuiltinOpParams(operationParams);
|
||||
for (auto &memObj : *operationParams.memObjsForAuxTranslation) {
|
||||
DispatchInfoBuilder<SplitDispatch::Dim::d1D, SplitDispatch::SplitMode::NoSplit> builder;
|
||||
auto graphicsAllocation = memObj->getGraphicsAllocation();
|
||||
size_t allocationSize = alignUp(memObj->getSize(), 512);
|
||||
|
||||
if (AuxTranslationDirection::AuxToNonAux == operationParams.auxTranslationDirection) {
|
||||
builder.setKernel(convertToNonAuxKernel.at(kernelInstanceNumber++).get());
|
||||
builder.setArg(0, memObj);
|
||||
builder.setArgSvm(1, allocationSize, reinterpret_cast<void *>(graphicsAllocation->getGpuAddress()), nullptr, 0u);
|
||||
} else {
|
||||
UNRECOVERABLE_IF(AuxTranslationDirection::NonAuxToAux != operationParams.auxTranslationDirection);
|
||||
builder.setKernel(convertToAuxKernel.at(kernelInstanceNumber++).get());
|
||||
builder.setArgSvm(0, allocationSize, reinterpret_cast<void *>(graphicsAllocation->getGpuAddress()), nullptr, 0u);
|
||||
builder.setArg(1, memObj);
|
||||
}
|
||||
|
||||
size_t xGws = allocationSize / 16;
|
||||
|
||||
builder.setDispatchGeometry(Vec3<size_t>{xGws, 0, 0}, Vec3<size_t>{0, 0, 0}, Vec3<size_t>{0, 0, 0});
|
||||
builder.bake(multiDispatchInfo);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void BuiltInOp<EBuiltInOps::AuxTranslation>::resizeKernelInstances(size_t size) const {
|
||||
convertToNonAuxKernel.reserve(size);
|
||||
convertToAuxKernel.reserve(size);
|
||||
|
@ -521,22 +521,6 @@ void CommandQueue::releaseIndirectHeap(IndirectHeap::Type heapType) {
|
||||
getCommandStreamReceiver().releaseIndirectHeap(heapType);
|
||||
}
|
||||
|
||||
void CommandQueue::dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, MemObjsForAuxTranslation &memObjsForAuxTranslation,
|
||||
AuxTranslationDirection auxTranslationDirection) {
|
||||
if (!multiDispatchInfo.empty()) {
|
||||
multiDispatchInfo.rbegin()->setPipeControlRequired(true);
|
||||
}
|
||||
auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getContext(), getDevice());
|
||||
BuiltinOpParams dispatchParams;
|
||||
|
||||
dispatchParams.memObjsForAuxTranslation = &memObjsForAuxTranslation;
|
||||
dispatchParams.auxTranslationDirection = auxTranslationDirection;
|
||||
|
||||
builder.buildDispatchInfos(multiDispatchInfo, dispatchParams);
|
||||
|
||||
multiDispatchInfo.rbegin()->setPipeControlRequired(true);
|
||||
}
|
||||
|
||||
void CommandQueue::obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes, bool clearAllDependencies) {
|
||||
auto allocator = getCommandStreamReceiver().getTimestampPacketAllocator();
|
||||
|
||||
|
@ -429,9 +429,6 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
|
||||
virtual void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType){};
|
||||
|
||||
MOCKABLE_VIRTUAL void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, MemObjsForAuxTranslation &memObjsForAuxTranslation,
|
||||
AuxTranslationDirection auxTranslationDirection);
|
||||
|
||||
MOCKABLE_VIRTUAL void obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes, bool clearAllDependencies);
|
||||
void processProperties(const cl_queue_properties *properties);
|
||||
bool bufferCpuCopyAllowed(Buffer *buffer, cl_command_type commandType, cl_bool blocking, size_t size, void *ptr,
|
||||
|
@ -382,6 +382,9 @@ class CommandQueueHw : public CommandQueue {
|
||||
cl_int enqueueMarkerForReadWriteOperation(MemObj *memObj, void *ptr, cl_command_type commandType, cl_bool blocking, cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList, cl_event *event);
|
||||
|
||||
MOCKABLE_VIRTUAL void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, MemObjsForAuxTranslation &memObjsForAuxTranslation,
|
||||
AuxTranslationDirection auxTranslationDirection);
|
||||
|
||||
private:
|
||||
bool isTaskLevelUpdateRequired(const uint32_t &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType);
|
||||
void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType) override;
|
||||
|
@ -5,6 +5,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "runtime/built_ins/aux_translation_builtin.h"
|
||||
#include "runtime/command_queue/enqueue_barrier.h"
|
||||
#include "runtime/command_queue/enqueue_copy_buffer.h"
|
||||
#include "runtime/command_queue/enqueue_copy_buffer_rect.h"
|
||||
@ -99,4 +100,17 @@ cl_int CommandQueueHw<Family>::enqueueMarkerForReadWriteOperation(MemObj *memObj
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
void CommandQueueHw<Family>::dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, MemObjsForAuxTranslation &memObjsForAuxTranslation,
|
||||
AuxTranslationDirection auxTranslationDirection) {
|
||||
auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getContext(), getDevice());
|
||||
auto &auxTranslationBuilder = static_cast<BuiltInOp<EBuiltInOps::AuxTranslation> &>(builder);
|
||||
BuiltinOpParams dispatchParams;
|
||||
|
||||
dispatchParams.memObjsForAuxTranslation = &memObjsForAuxTranslation;
|
||||
dispatchParams.auxTranslationDirection = auxTranslationDirection;
|
||||
|
||||
auxTranslationBuilder.buildDispatchInfosForAuxTranslation<Family>(multiDispatchInfo, dispatchParams);
|
||||
}
|
||||
} // namespace NEO
|
||||
|
@ -196,14 +196,10 @@ size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, c
|
||||
|
||||
size_t expectedSizeCS = 0;
|
||||
Kernel *parentKernel = multiDispatchInfo.peekParentKernel();
|
||||
if (multiDispatchInfo.peekMainKernel() && multiDispatchInfo.peekMainKernel()->isAuxTranslationRequired()) {
|
||||
expectedSizeCS += sizeof(PIPE_CONTROL);
|
||||
}
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, dispatchInfo.getKernel());
|
||||
if (dispatchInfo.isPipeControlRequired()) {
|
||||
expectedSizeCS += sizeof(PIPE_CONTROL);
|
||||
}
|
||||
expectedSizeCS += dispatchInfo.dispatchInitCommands.estimateCommandsSize();
|
||||
expectedSizeCS += dispatchInfo.dispatchEpilogueCommands.estimateCommandsSize();
|
||||
}
|
||||
if (parentKernel) {
|
||||
SchedulerKernel &scheduler = commandQueue.getDevice().getExecutionEnvironment()->getBuiltIns()->getSchedulerKernel(parentKernel->getContext());
|
||||
|
@ -114,18 +114,11 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
||||
|
||||
DEBUG_BREAK_IF(offsetInterfaceDescriptorTable % 64 != 0);
|
||||
|
||||
if (mainKernel->isAuxTranslationRequired()) {
|
||||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||
auto pPipeControlCmd = static_cast<PIPE_CONTROL *>(commandStream->getSpace(sizeof(PIPE_CONTROL)));
|
||||
*pPipeControlCmd = GfxFamily::cmdInitPipeControl;
|
||||
pPipeControlCmd->setDcFlushEnable(true);
|
||||
pPipeControlCmd->setCommandStreamerStallEnable(true);
|
||||
}
|
||||
|
||||
dispatchProfilingPerfStartCommands(hwTimeStamps, hwPerfCounter, commandStream, commandQueue);
|
||||
|
||||
size_t currentDispatchIndex = 0;
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
dispatchInfo.dispatchInitCommands(*commandStream);
|
||||
auto &kernel = *dispatchInfo.getKernel();
|
||||
DEBUG_BREAK_IF(!(dispatchInfo.getDim() >= 1 && dispatchInfo.getDim() <= 3));
|
||||
DEBUG_BREAK_IF(!(dispatchInfo.getGWS().z == 1 || dispatchInfo.getDim() == 3));
|
||||
@ -203,14 +196,9 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
||||
offsetInterfaceDescriptorTable, numberOfWorkgroups, startOfWorkgroups);
|
||||
|
||||
dispatchWorkarounds(commandStream, commandQueue, kernel, false);
|
||||
if (dispatchInfo.isPipeControlRequired()) {
|
||||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||
auto pPipeControlCmd = static_cast<PIPE_CONTROL *>(commandStream->getSpace(sizeof(PIPE_CONTROL)));
|
||||
*pPipeControlCmd = GfxFamily::cmdInitPipeControl;
|
||||
pPipeControlCmd->setCommandStreamerStallEnable(true);
|
||||
}
|
||||
|
||||
currentDispatchIndex++;
|
||||
dispatchInfo.dispatchEpilogueCommands(*commandStream);
|
||||
}
|
||||
if (mainKernel->requiresCacheFlushCommand(commandQueue)) {
|
||||
uint64_t postSyncAddress = 0;
|
||||
|
@ -73,6 +73,7 @@ set(RUNTIME_SRCS_HELPERS_BASE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/properties_helper.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/queue_helpers.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/queue_helpers.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/registered_method_dispatcher.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/sampler_helpers.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/state_base_address.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/state_base_address_base.inl
|
||||
|
@ -9,6 +9,7 @@
|
||||
|
||||
#include "core/helpers/vec.h"
|
||||
#include "runtime/built_ins/builtins_dispatch_builder.h"
|
||||
#include "runtime/helpers/registered_method_dispatcher.h"
|
||||
#include "runtime/mem_obj/mem_obj.h"
|
||||
#include "runtime/memory_manager/surface.h"
|
||||
#include "runtime/utilities/stackvec.h"
|
||||
@ -21,14 +22,15 @@ namespace NEO {
|
||||
class Kernel;
|
||||
|
||||
class DispatchInfo {
|
||||
|
||||
public:
|
||||
using DispatchCommandMethodT = void(LinearStream &commandStream);
|
||||
|
||||
DispatchInfo() = default;
|
||||
DispatchInfo(Kernel *kernel, uint32_t dim, Vec3<size_t> gws, Vec3<size_t> elws, Vec3<size_t> offset)
|
||||
: kernel(kernel), dim(dim), gws(gws), elws(elws), offset(offset) {}
|
||||
DispatchInfo(Kernel *kernel, uint32_t dim, Vec3<size_t> gws, Vec3<size_t> elws, Vec3<size_t> offset, Vec3<size_t> agws, Vec3<size_t> lws, Vec3<size_t> twgs, Vec3<size_t> nwgs, Vec3<size_t> swgs)
|
||||
: kernel(kernel), dim(dim), gws(gws), elws(elws), offset(offset), agws(agws), lws(lws), twgs(twgs), nwgs(nwgs), swgs(swgs) {}
|
||||
bool isPipeControlRequired() const { return pipeControlRequired; }
|
||||
void setPipeControlRequired(bool blocking) { this->pipeControlRequired = blocking; }
|
||||
bool usesSlm() const;
|
||||
bool usesStatelessPrintfSurface() const;
|
||||
uint32_t getRequiredScratchSize() const;
|
||||
@ -56,8 +58,10 @@ class DispatchInfo {
|
||||
bool peekCanBePartitioned() const { return canBePartitioned; }
|
||||
void setCanBePartitioned(bool canBePartitioned) { this->canBePartitioned = canBePartitioned; }
|
||||
|
||||
RegisteredMethodDispatcher<DispatchCommandMethodT> dispatchInitCommands;
|
||||
RegisteredMethodDispatcher<DispatchCommandMethodT> dispatchEpilogueCommands;
|
||||
|
||||
protected:
|
||||
bool pipeControlRequired = false;
|
||||
bool canBePartitioned = false;
|
||||
Kernel *kernel = nullptr;
|
||||
uint32_t dim = 0;
|
||||
|
@ -279,6 +279,9 @@ class DispatchInfoBuilder {
|
||||
}
|
||||
}
|
||||
|
||||
DispatchInfo &getDispatchInfo(size_t index) { return dispatchInfos[index]; }
|
||||
static constexpr size_t getMaxNumDispatches() { return numDispatches; }
|
||||
|
||||
protected:
|
||||
static bool supportsSplit() {
|
||||
return (Mode == SplitDispatch::SplitMode::WalkerSplit);
|
||||
@ -420,10 +423,6 @@ class DispatchInfoBuilder {
|
||||
return static_cast<uint32_t>(x);
|
||||
}
|
||||
|
||||
static constexpr size_t getMaxNumDispatches() {
|
||||
return numDispatches;
|
||||
}
|
||||
|
||||
static const size_t numDispatches = (Mode == SplitDispatch::SplitMode::WalkerSplit) ? 1 : powConst((static_cast<uint32_t>(Mode) + 1), // 1 (middle) 2 (middle + right/bottom) or 3 (lef/top + middle + right/mottom)
|
||||
(static_cast<uint32_t>(Dim) + 1)); // 1, 2 or 3
|
||||
|
||||
|
47
runtime/helpers/registered_method_dispatcher.h
Normal file
47
runtime/helpers/registered_method_dispatcher.h
Normal file
@ -0,0 +1,47 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
|
||||
namespace NEO {
|
||||
template <typename ArgsT>
|
||||
class RegisteredMethodDispatcher {
|
||||
public:
|
||||
using CommandsSizeEstimationMethodT = std::function<size_t(void)>;
|
||||
using RegisteredMethodT = std::function<ArgsT>;
|
||||
|
||||
void registerMethod(RegisteredMethodT method) {
|
||||
this->method = method;
|
||||
}
|
||||
|
||||
void registerCommandsSizeEstimationMethod(CommandsSizeEstimationMethodT method) {
|
||||
this->commandsEstimationMethod = method;
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
void operator()(Args &&... args) const {
|
||||
if (method) {
|
||||
method(std::forward<Args>(args)...);
|
||||
}
|
||||
}
|
||||
|
||||
size_t estimateCommandsSize() const {
|
||||
if (commandsEstimationMethod) {
|
||||
return commandsEstimationMethod();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
protected:
|
||||
CommandsSizeEstimationMethodT commandsEstimationMethod;
|
||||
RegisteredMethodT method;
|
||||
};
|
||||
|
||||
} // namespace NEO
|
@ -246,8 +246,9 @@ TEST_F(BuiltInTests, BuiltinDispatchInfoBuilderCopyBufferToBuffer) {
|
||||
delete dstPtr;
|
||||
}
|
||||
|
||||
TEST_F(BuiltInTests, givenInputBufferWhenBuildingNonAuxDispatchInfoForAuxTranslationThenPickAndSetupCorrectKernels) {
|
||||
BuiltinDispatchInfoBuilder &builder = pBuiltIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pContext, *pDevice);
|
||||
HWTEST_F(BuiltInTests, givenInputBufferWhenBuildingNonAuxDispatchInfoForAuxTranslationThenPickAndSetupCorrectKernels) {
|
||||
BuiltinDispatchInfoBuilder &baseBuilder = pBuiltIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pContext, *pDevice);
|
||||
auto &builder = static_cast<BuiltInOp<EBuiltInOps::AuxTranslation> &>(baseBuilder);
|
||||
|
||||
MemObjsForAuxTranslation memObjsForAuxTranslation;
|
||||
MultiDispatchInfo multiDispatchInfo;
|
||||
@ -265,7 +266,7 @@ TEST_F(BuiltInTests, givenInputBufferWhenBuildingNonAuxDispatchInfoForAuxTransla
|
||||
memObjsForAuxTranslation.insert(&buffer);
|
||||
}
|
||||
|
||||
EXPECT_TRUE(builder.buildDispatchInfos(multiDispatchInfo, builtinOpsParams));
|
||||
EXPECT_TRUE(builder.buildDispatchInfosForAuxTranslation<FamilyType>(multiDispatchInfo, builtinOpsParams));
|
||||
EXPECT_EQ(3u, multiDispatchInfo.size());
|
||||
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
@ -294,8 +295,9 @@ TEST_F(BuiltInTests, givenInputBufferWhenBuildingNonAuxDispatchInfoForAuxTransla
|
||||
EXPECT_NE(builtinKernels[1], builtinKernels[2]);
|
||||
}
|
||||
|
||||
TEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxDispatchInfoForAuxTranslationThenPickAndSetupCorrectKernels) {
|
||||
BuiltinDispatchInfoBuilder &builder = pBuiltIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pContext, *pDevice);
|
||||
HWTEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxDispatchInfoForAuxTranslationThenPickAndSetupCorrectKernels) {
|
||||
BuiltinDispatchInfoBuilder &baseBuilder = pBuiltIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pContext, *pDevice);
|
||||
auto &builder = static_cast<BuiltInOp<EBuiltInOps::AuxTranslation> &>(baseBuilder);
|
||||
|
||||
MemObjsForAuxTranslation memObjsForAuxTranslation;
|
||||
MultiDispatchInfo multiDispatchInfo;
|
||||
@ -313,7 +315,7 @@ TEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxDispatchInfoForAuxTranslatio
|
||||
memObjsForAuxTranslation.insert(&buffer);
|
||||
}
|
||||
|
||||
EXPECT_TRUE(builder.buildDispatchInfos(multiDispatchInfo, builtinOpsParams));
|
||||
EXPECT_TRUE(builder.buildDispatchInfosForAuxTranslation<FamilyType>(multiDispatchInfo, builtinOpsParams));
|
||||
EXPECT_EQ(3u, multiDispatchInfo.size());
|
||||
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
@ -342,8 +344,9 @@ TEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxDispatchInfoForAuxTranslatio
|
||||
EXPECT_NE(builtinKernels[1], builtinKernels[2]);
|
||||
}
|
||||
|
||||
TEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxTranslationDispatchThenPickDifferentKernelsDependingOnRequest) {
|
||||
BuiltinDispatchInfoBuilder &builder = pBuiltIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pContext, *pDevice);
|
||||
HWTEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxTranslationDispatchThenPickDifferentKernelsDependingOnRequest) {
|
||||
BuiltinDispatchInfoBuilder &baseBuilder = pBuiltIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pContext, *pDevice);
|
||||
auto &builder = static_cast<BuiltInOp<EBuiltInOps::AuxTranslation> &>(baseBuilder);
|
||||
|
||||
MemObjsForAuxTranslation memObjsForAuxTranslation;
|
||||
MockBuffer mockBuffer[3];
|
||||
@ -358,10 +361,10 @@ TEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxTranslationDispatchThenPickD
|
||||
}
|
||||
|
||||
builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::AuxToNonAux;
|
||||
EXPECT_TRUE(builder.buildDispatchInfos(multiDispatchInfo, builtinOpsParams));
|
||||
EXPECT_TRUE(builder.buildDispatchInfosForAuxTranslation<FamilyType>(multiDispatchInfo, builtinOpsParams));
|
||||
|
||||
builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::NonAuxToAux;
|
||||
EXPECT_TRUE(builder.buildDispatchInfos(multiDispatchInfo, builtinOpsParams));
|
||||
EXPECT_TRUE(builder.buildDispatchInfosForAuxTranslation<FamilyType>(multiDispatchInfo, builtinOpsParams));
|
||||
|
||||
EXPECT_EQ(6u, multiDispatchInfo.size());
|
||||
|
||||
@ -376,8 +379,9 @@ TEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxTranslationDispatchThenPickD
|
||||
EXPECT_NE(builtinKernels[2], builtinKernels[5]);
|
||||
}
|
||||
|
||||
TEST_F(BuiltInTests, givenInvalidAuxTranslationDirectionWhenBuildingDispatchInfosThenAbort) {
|
||||
BuiltinDispatchInfoBuilder &builder = pBuiltIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pContext, *pDevice);
|
||||
HWTEST_F(BuiltInTests, givenInvalidAuxTranslationDirectionWhenBuildingDispatchInfosThenAbort) {
|
||||
BuiltinDispatchInfoBuilder &baseBuilder = pBuiltIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pContext, *pDevice);
|
||||
auto &builder = static_cast<BuiltInOp<EBuiltInOps::AuxTranslation> &>(baseBuilder);
|
||||
|
||||
MemObjsForAuxTranslation memObjsForAuxTranslation;
|
||||
MockBuffer mockBuffer;
|
||||
@ -389,7 +393,7 @@ TEST_F(BuiltInTests, givenInvalidAuxTranslationDirectionWhenBuildingDispatchInfo
|
||||
memObjsForAuxTranslation.insert(&mockBuffer);
|
||||
|
||||
builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::None;
|
||||
EXPECT_THROW(builder.buildDispatchInfos(multiDispatchInfo, builtinOpsParams), std::exception);
|
||||
EXPECT_THROW(builder.buildDispatchInfosForAuxTranslation<FamilyType>(multiDispatchInfo, builtinOpsParams), std::exception);
|
||||
}
|
||||
|
||||
class MockAuxBuilInOp : public BuiltInOp<EBuiltInOps::AuxTranslation> {
|
||||
@ -411,7 +415,7 @@ TEST_F(BuiltInTests, whenAuxBuiltInIsConstructedThenResizeKernelInstancedTo5) {
|
||||
EXPECT_EQ(5u, mockAuxBuiltInOp.convertToNonAuxKernel.size());
|
||||
}
|
||||
|
||||
TEST_F(BuiltInTests, givenMoreBuffersForAuxTranslationThanKernelInstancesWhenDispatchingThenResize) {
|
||||
HWTEST_F(BuiltInTests, givenMoreBuffersForAuxTranslationThanKernelInstancesWhenDispatchingThenResize) {
|
||||
MockAuxBuilInOp mockAuxBuiltInOp(*pBuiltIns, *pContext, *pDevice);
|
||||
EXPECT_EQ(5u, mockAuxBuiltInOp.convertToAuxKernel.size());
|
||||
EXPECT_EQ(5u, mockAuxBuiltInOp.convertToNonAuxKernel.size());
|
||||
@ -428,7 +432,7 @@ TEST_F(BuiltInTests, givenMoreBuffersForAuxTranslationThanKernelInstancesWhenDis
|
||||
memObjsForAuxTranslation.insert(&buffer);
|
||||
}
|
||||
|
||||
EXPECT_TRUE(mockAuxBuiltInOp.buildDispatchInfos(multiDispatchInfo, builtinOpsParams));
|
||||
EXPECT_TRUE(mockAuxBuiltInOp.buildDispatchInfosForAuxTranslation<FamilyType>(multiDispatchInfo, builtinOpsParams));
|
||||
EXPECT_EQ(7u, mockAuxBuiltInOp.convertToAuxKernel.size());
|
||||
EXPECT_EQ(7u, mockAuxBuiltInOp.convertToNonAuxKernel.size());
|
||||
}
|
||||
|
@ -5,6 +5,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "runtime/built_ins/aux_translation_builtin.h"
|
||||
#include "runtime/command_queue/gpgpu_walker.h"
|
||||
#include "runtime/command_queue/hardware_interface.h"
|
||||
#include "runtime/event/perf_counter.h"
|
||||
@ -18,6 +19,7 @@
|
||||
#include "unit_tests/fixtures/device_fixture.h"
|
||||
#include "unit_tests/helpers/debug_manager_state_restore.h"
|
||||
#include "unit_tests/helpers/hw_parse.h"
|
||||
#include "unit_tests/mocks/mock_buffer.h"
|
||||
#include "unit_tests/mocks/mock_command_queue.h"
|
||||
#include "unit_tests/mocks/mock_graphics_allocation.h"
|
||||
#include "unit_tests/mocks/mock_kernel.h"
|
||||
@ -1266,7 +1268,13 @@ TEST(DispatchWalker, calculateDispatchDim) {
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(DispatchWalkerTest, givenKernelWhenAuxTranslationRequiredThenPipeControlWithStallAndDCFlushAdded) {
|
||||
HWTEST_F(DispatchWalkerTest, givenKernelWhenAuxToNonAuxWhenTranslationRequiredThenPipeControlWithStallAndDCFlushAdded) {
|
||||
MockContext context;
|
||||
auto executionEnvironment = pDevice->getExecutionEnvironment();
|
||||
auto builtIns = executionEnvironment->getBuiltIns();
|
||||
BuiltinDispatchInfoBuilder &baseBuilder = builtIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, context, *pDevice);
|
||||
auto &builder = static_cast<BuiltInOp<EBuiltInOps::AuxTranslation> &>(baseBuilder);
|
||||
|
||||
MockKernel kernel(program.get(), kernelInfo, *pDevice);
|
||||
kernelInfo.workloadInfo.workDimOffset = 0;
|
||||
ASSERT_EQ(CL_SUCCESS, kernel.initialize());
|
||||
@ -1274,11 +1282,18 @@ HWTEST_F(DispatchWalkerTest, givenKernelWhenAuxTranslationRequiredThenPipeContro
|
||||
auto &cmdStream = pCmdQ->getCS(0);
|
||||
void *buffer = cmdStream.getCpuBase();
|
||||
kernel.auxTranslationRequired = true;
|
||||
MockBuffer mockBuffer[2];
|
||||
|
||||
MockMultiDispatchInfo multiDispatchInfo(&kernel);
|
||||
DispatchInfo di1(&kernel, 1, Vec3<size_t>(1, 1, 1), Vec3<size_t>(1, 1, 1), Vec3<size_t>(0, 0, 0));
|
||||
di1.setPipeControlRequired(true);
|
||||
multiDispatchInfo.push(di1);
|
||||
MultiDispatchInfo multiDispatchInfo;
|
||||
MemObjsForAuxTranslation memObjsForAuxTranslation;
|
||||
memObjsForAuxTranslation.insert(&mockBuffer[0]);
|
||||
memObjsForAuxTranslation.insert(&mockBuffer[1]);
|
||||
|
||||
BuiltinOpParams builtinOpsParams;
|
||||
builtinOpsParams.memObjsForAuxTranslation = &memObjsForAuxTranslation;
|
||||
builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::AuxToNonAux;
|
||||
|
||||
builder.buildDispatchInfosForAuxTranslation<FamilyType>(multiDispatchInfo, builtinOpsParams);
|
||||
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
@ -1305,7 +1320,66 @@ HWTEST_F(DispatchWalkerTest, givenKernelWhenAuxTranslationRequiredThenPipeContro
|
||||
EXPECT_TRUE(beginPipeControl->getCommandStreamerStallEnable());
|
||||
|
||||
auto endPipeControl = genCmdCast<typename FamilyType::PIPE_CONTROL *>(*(pipeControls[1]));
|
||||
EXPECT_FALSE(endPipeControl->getDcFlushEnable());
|
||||
bool dcFlushRequired = (executionEnvironment->getHardwareInfo()->platform.eRenderCoreFamily == IGFX_GEN8_CORE);
|
||||
EXPECT_EQ(dcFlushRequired, endPipeControl->getDcFlushEnable());
|
||||
EXPECT_TRUE(endPipeControl->getCommandStreamerStallEnable());
|
||||
}
|
||||
|
||||
HWTEST_F(DispatchWalkerTest, givenKernelWhenNonAuxToAuxWhenTranslationRequiredThenPipeControlWithStallAdded) {
|
||||
MockContext context;
|
||||
auto executionEnvironment = pDevice->getExecutionEnvironment();
|
||||
auto builtIns = executionEnvironment->getBuiltIns();
|
||||
BuiltinDispatchInfoBuilder &baseBuilder = builtIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, context, *pDevice);
|
||||
auto &builder = static_cast<BuiltInOp<EBuiltInOps::AuxTranslation> &>(baseBuilder);
|
||||
|
||||
MockKernel kernel(program.get(), kernelInfo, *pDevice);
|
||||
kernelInfo.workloadInfo.workDimOffset = 0;
|
||||
ASSERT_EQ(CL_SUCCESS, kernel.initialize());
|
||||
|
||||
auto &cmdStream = pCmdQ->getCS(0);
|
||||
void *buffer = cmdStream.getCpuBase();
|
||||
kernel.auxTranslationRequired = true;
|
||||
MockBuffer mockBuffer[2];
|
||||
|
||||
MultiDispatchInfo multiDispatchInfo;
|
||||
MemObjsForAuxTranslation memObjsForAuxTranslation;
|
||||
memObjsForAuxTranslation.insert(&mockBuffer[0]);
|
||||
memObjsForAuxTranslation.insert(&mockBuffer[1]);
|
||||
|
||||
BuiltinOpParams builtinOpsParams;
|
||||
builtinOpsParams.memObjsForAuxTranslation = &memObjsForAuxTranslation;
|
||||
builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::NonAuxToAux;
|
||||
|
||||
builder.buildDispatchInfosForAuxTranslation<FamilyType>(multiDispatchInfo, builtinOpsParams);
|
||||
|
||||
HardwareInterface<FamilyType>::dispatchWalker(
|
||||
*pCmdQ,
|
||||
multiDispatchInfo,
|
||||
CsrDependencies(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
pDevice->getPreemptionMode(),
|
||||
false);
|
||||
|
||||
auto sizeUsed = cmdStream.getUsed();
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, buffer, sizeUsed));
|
||||
|
||||
auto pipeControls = findAll<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
|
||||
ASSERT_EQ(2u, pipeControls.size());
|
||||
|
||||
bool dcFlushRequired = (executionEnvironment->getHardwareInfo()->platform.eRenderCoreFamily == IGFX_GEN8_CORE);
|
||||
|
||||
auto beginPipeControl = genCmdCast<typename FamilyType::PIPE_CONTROL *>(*(pipeControls[0]));
|
||||
EXPECT_EQ(dcFlushRequired, beginPipeControl->getDcFlushEnable());
|
||||
EXPECT_TRUE(beginPipeControl->getCommandStreamerStallEnable());
|
||||
|
||||
auto endPipeControl = genCmdCast<typename FamilyType::PIPE_CONTROL *>(*(pipeControls[1]));
|
||||
EXPECT_EQ(dcFlushRequired, endPipeControl->getDcFlushEnable());
|
||||
EXPECT_TRUE(endPipeControl->getCommandStreamerStallEnable());
|
||||
}
|
||||
|
||||
|
@ -655,6 +655,7 @@ struct EnqueueAuxKernelTests : public EnqueueKernelTest {
|
||||
template <typename FamilyType>
|
||||
class MyCmdQ : public CommandQueueHw<FamilyType> {
|
||||
public:
|
||||
using CommandQueueHw<FamilyType>::commandStream;
|
||||
MyCmdQ(Context *context, Device *device) : CommandQueueHw<FamilyType>(context, device, nullptr) {}
|
||||
void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, MemObjsForAuxTranslation &memObjsForAuxTranslation,
|
||||
AuxTranslationDirection auxTranslationDirection) override {
|
||||
@ -733,14 +734,19 @@ HWTEST_F(EnqueueAuxKernelTests, givenMultipleArgsWhenAuxTranslationIsRequiredThe
|
||||
|
||||
EXPECT_EQ(&buffer2, *std::get<MemObjsForAuxTranslation>(cmdQ.dispatchAuxTranslationInputs.at(0)).begin());
|
||||
EXPECT_EQ(&buffer2, *std::get<MemObjsForAuxTranslation>(cmdQ.dispatchAuxTranslationInputs.at(1)).begin());
|
||||
uint32_t pipeControlCount = 0;
|
||||
for (auto dispatchInfo : cmdQ.dispatchInfos) {
|
||||
if (dispatchInfo.isPipeControlRequired()) {
|
||||
++pipeControlCount;
|
||||
}
|
||||
}
|
||||
|
||||
EXPECT_EQ(4u, pipeControlCount);
|
||||
auto cmdStream = cmdQ.commandStream;
|
||||
auto sizeUsed = cmdStream->getUsed();
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, cmdStream->getCpuBase(), sizeUsed));
|
||||
|
||||
auto pipeControls = findAll<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
|
||||
auto additionalPcCount = PipeControlHelper<FamilyType>::getSizeForPipeControlWithPostSyncOperation() / sizeof(typename FamilyType::PIPE_CONTROL);
|
||||
|
||||
// |AuxToNonAux|NDR|NonAuxToAux|
|
||||
ASSERT_EQ(4u + additionalPcCount, pipeControls.size());
|
||||
|
||||
ASSERT_EQ(2u, cmdQ.auxTranslationDirections.size());
|
||||
EXPECT_EQ(AuxTranslationDirection::AuxToNonAux, cmdQ.auxTranslationDirections[0]);
|
||||
EXPECT_EQ(AuxTranslationDirection::NonAuxToAux, cmdQ.auxTranslationDirections[1]);
|
||||
|
Reference in New Issue
Block a user