Aux translation [3/n]: Dispatch AuxTranslation builtin when required

Change-Id: I9bd0294de7980ac01ebb3c2d696eba6fd6a456ec
This commit is contained in:
Dunajski, Bartosz
2018-08-11 18:12:28 +02:00
committed by sys_ocldev
parent 93c1a7b51b
commit 6ca84c278a
10 changed files with 115 additions and 37 deletions

View File

@@ -50,11 +50,12 @@ bool BuiltInOp<HWFamily, EBuiltInOps::AuxTranslation>::buildDispatchInfos(MultiD
auto graphicsAllocation = buffer->getGraphicsAllocation();
size_t allocationSize = graphicsAllocation->getUnderlyingBufferSize();
if (operationParams.forceNonAuxMode) {
if (AuxTranslationDirection::AuxToNonAux == operationParams.auxTranslationDirection) {
builder.setKernel(convertToNonAuxKernel.at(kernelInstanceNumber++).get());
builder.setArg(0, buffer);
builder.setArgSvm(1, allocationSize, reinterpret_cast<void *>(graphicsAllocation->getGpuAddress()));
} else {
UNRECOVERABLE_IF(AuxTranslationDirection::NonAuxToAux != operationParams.auxTranslationDirection);
builder.setKernel(convertToAuxKernel.at(kernelInstanceNumber++).get());
builder.setArgSvm(0, allocationSize, reinterpret_cast<void *>(graphicsAllocation->getGpuAddress()));
builder.setArg(1, buffer);

View File

@@ -54,7 +54,7 @@ class BuiltinDispatchInfoBuilder {
GraphicsAllocation *srcSvmAlloc = nullptr;
GraphicsAllocation *dstSvmAlloc = nullptr;
const BuffersForAuxTranslation *buffersForAuxTranslation = nullptr;
bool forceNonAuxMode = false;
AuxTranslationDirection auxTranslationDirection = AuxTranslationDirection::None;
Vec3<size_t> srcOffset = {0, 0, 0};
Vec3<size_t> dstOffset = {0, 0, 0};
Vec3<size_t> size = {0, 0, 0};

View File

@@ -20,7 +20,7 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "runtime/built_ins/sip.h"
#include "runtime/built_ins/builtins_dispatch_builder.h"
#include "runtime/command_queue/command_queue.h"
#include "runtime/command_queue/command_queue_hw.h"
#include "runtime/command_stream/command_stream_receiver.h"
@@ -594,4 +594,14 @@ void CommandQueue::releaseIndirectHeap(IndirectHeap::Type heapType) {
this->getDevice().getCommandStreamReceiver().releaseIndirectHeap(heapType);
}
void CommandQueue::dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, BuffersForAuxTranslation &buffersForAuxTranslation,
AuxTranslationDirection auxTranslationDirection) {
auto &builder = BuiltIns::getInstance().getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getContext(), getDevice());
BuiltinDispatchInfoBuilder::BuiltinOpParams dispatchParams;
dispatchParams.buffersForAuxTranslation = &buffersForAuxTranslation;
dispatchParams.auxTranslationDirection = auxTranslationDirection;
builder.buildDispatchInfos(multiDispatchInfo, dispatchParams);
}
} // namespace OCLRT

View File

@@ -415,7 +415,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
virtual void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueue, unsigned int commandType){};
MOCKABLE_VIRTUAL void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, BuffersForAuxTranslation &buffersForAuxTranslation) {}
MOCKABLE_VIRTUAL void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, BuffersForAuxTranslation &buffersForAuxTranslation,
AuxTranslationDirection auxTranslationDirection);
Context *context;
Device *device;

View File

@@ -86,7 +86,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
BuffersForAuxTranslation buffersForAuxTranslation;
if (kernel->isAuxTranslationRequired()) {
kernel->fillWithBuffersForAuxTranslation(buffersForAuxTranslation);
dispatchAuxTranslation(multiDispatchInfo, buffersForAuxTranslation);
dispatchAuxTranslation(multiDispatchInfo, buffersForAuxTranslation, AuxTranslationDirection::AuxToNonAux);
}
if (kernel->getKernelInfo().builtinDispatchBuilder == nullptr) {
@@ -103,7 +103,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
}
}
if (kernel->isAuxTranslationRequired()) {
dispatchAuxTranslation(multiDispatchInfo, buffersForAuxTranslation);
dispatchAuxTranslation(multiDispatchInfo, buffersForAuxTranslation, AuxTranslationDirection::NonAuxToAux);
}
}

View File

@@ -51,6 +51,12 @@ using MemObjSizeArray = std::array<size_t, 3>;
using MemObjOffsetArray = std::array<size_t, 3>;
using BuffersForAuxTranslation = std::unordered_set<Buffer *>;
enum class AuxTranslationDirection {
None,
AuxToNonAux,
NonAuxToAux
};
struct TransferProperties {
TransferProperties() = delete;

View File

@@ -413,6 +413,8 @@ cl_int Kernel::cloneKernel(Kernel *pSourceKernel) {
kernelSvmGfxAllocations.push_back(gfxAlloc);
}
this->isBuiltIn = pSourceKernel->isBuiltIn;
return CL_SUCCESS;
}

View File

@@ -271,7 +271,7 @@ TEST_F(BuiltInTests, givenInputBufferWhenBuildingNonAuxDispatchInfoForAuxTransla
BuiltinDispatchInfoBuilder::BuiltinOpParams builtinOpsParams;
builtinOpsParams.buffersForAuxTranslation = &buffersForAuxTranslation;
builtinOpsParams.forceNonAuxMode = true;
builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::AuxToNonAux;
for (auto &buffer : mockBuffer) {
buffersForAuxTranslation.insert(&buffer);
@@ -319,7 +319,7 @@ TEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxDispatchInfoForAuxTranslatio
BuiltinDispatchInfoBuilder::BuiltinOpParams builtinOpsParams;
builtinOpsParams.buffersForAuxTranslation = &buffersForAuxTranslation;
builtinOpsParams.forceNonAuxMode = false;
builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::NonAuxToAux;
for (auto &buffer : mockBuffer) {
buffersForAuxTranslation.insert(&buffer);
@@ -369,10 +369,10 @@ TEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxTranslationDispatchThenPickD
buffersForAuxTranslation.insert(&buffer);
}
builtinOpsParams.forceNonAuxMode = true;
builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::AuxToNonAux;
EXPECT_TRUE(builder.buildDispatchInfos(multiDispatchInfo, builtinOpsParams));
builtinOpsParams.forceNonAuxMode = false;
builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::NonAuxToAux;
EXPECT_TRUE(builder.buildDispatchInfos(multiDispatchInfo, builtinOpsParams));
EXPECT_EQ(6u, multiDispatchInfo.size());
@@ -388,6 +388,22 @@ TEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxTranslationDispatchThenPickD
EXPECT_NE(builtinKernels[2], builtinKernels[5]);
}
TEST_F(BuiltInTests, givenInvalidAuxTranslationDirectionWhenBuildingDispatchInfosThenAbort) {
BuiltinDispatchInfoBuilder &builder = pBuiltIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pContext, *pDevice);
BuffersForAuxTranslation buffersForAuxTranslation;
MockBuffer mockBuffer;
MultiDispatchInfo multiDispatchInfo;
BuiltinDispatchInfoBuilder::BuiltinOpParams builtinOpsParams;
builtinOpsParams.buffersForAuxTranslation = &buffersForAuxTranslation;
buffersForAuxTranslation.insert(&mockBuffer);
builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::None;
EXPECT_THROW(builder.buildDispatchInfos(multiDispatchInfo, builtinOpsParams), std::exception);
}
template <typename Family>
class MockAuxBuilInOp : public BuiltInOp<Family, EBuiltInOps::AuxTranslation> {
public:
@@ -418,6 +434,7 @@ HWTEST_F(BuiltInTests, givenMoreBuffersForAuxTranslationThanKernelInstancesWhenD
MockBuffer mockBuffer[7];
builtinOpsParams.buffersForAuxTranslation = &buffersForAuxTranslation;
builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::AuxToNonAux;
for (auto &buffer : mockBuffer) {
buffersForAuxTranslation.insert(&buffer);

View File

@@ -1565,45 +1565,49 @@ HWTEST_F(EnqueueKernelTest, givenNonVMEKernelWhenEnqueueKernelThenDispatchFlagsD
EXPECT_FALSE(mockCsr->passedDispatchFlags.mediaSamplerRequired);
}
HWTEST_F(EnqueueKernelTest, givenKernelWithRequiredAuxTranslationWhenEnqueuedThenGuardKernelWithAuxTranslations) {
struct EnqueueAuxKernelTests : public EnqueueKernelTest {
template <typename FamilyType>
class MyCmdQ : public CommandQueueHw<FamilyType> {
public:
MyCmdQ(Context *context, Device *device) : CommandQueueHw<FamilyType>(context, device, nullptr) {}
void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, BuffersForAuxTranslation &buffersForAuxTranslation) override {
CommandQueueHw<FamilyType>::dispatchAuxTranslation(multiDispatchInfo, buffersForAuxTranslation);
multiDispatchInfoSizes.push_back(multiDispatchInfo.size());
void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, BuffersForAuxTranslation &buffersForAuxTranslation,
AuxTranslationDirection auxTranslationDirection) override {
CommandQueueHw<FamilyType>::dispatchAuxTranslation(multiDispatchInfo, buffersForAuxTranslation, auxTranslationDirection);
Kernel *lastKernel = nullptr;
for (const auto &dispatchInfo : multiDispatchInfo) {
lastKernel = dispatchInfo.getKernel();
}
dispatchAuxTranslationInputs.emplace_back(lastKernel, multiDispatchInfo.size(), buffersForAuxTranslation, auxTranslationDirection);
}
std::vector<size_t> multiDispatchInfoSizes;
std::vector<std::tuple<Kernel *, size_t, BuffersForAuxTranslation, AuxTranslationDirection>> dispatchAuxTranslationInputs;
};
};
HWTEST_F(EnqueueAuxKernelTests, givenKernelWithRequiredAuxTranslationWhenEnqueuedThenGuardKernelWithAuxTranslations) {
MockKernelWithInternals mockKernel(*pDevice, context);
MyCmdQ cmdQ(context, pDevice);
MyCmdQ<FamilyType> cmdQ(context, pDevice);
size_t gws[3] = {1, 0, 0};
mockKernel.mockKernel->auxTranslationRequired = true;
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(2u, cmdQ.multiDispatchInfoSizes.size());
EXPECT_EQ(0u, cmdQ.multiDispatchInfoSizes.at(0)); // before kernel
EXPECT_EQ(1u, cmdQ.multiDispatchInfoSizes.at(1)); // after kernel
EXPECT_EQ(2u, cmdQ.dispatchAuxTranslationInputs.size());
// before kernel
EXPECT_EQ(0u, std::get<size_t>(cmdQ.dispatchAuxTranslationInputs.at(0)));
EXPECT_EQ(AuxTranslationDirection::AuxToNonAux, std::get<AuxTranslationDirection>(cmdQ.dispatchAuxTranslationInputs.at(0)));
// after kernel
EXPECT_EQ(1u, std::get<size_t>(cmdQ.dispatchAuxTranslationInputs.at(1)));
EXPECT_EQ(AuxTranslationDirection::NonAuxToAux, std::get<AuxTranslationDirection>(cmdQ.dispatchAuxTranslationInputs.at(1)));
mockKernel.mockKernel->auxTranslationRequired = false;
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(2u, cmdQ.multiDispatchInfoSizes.size()); // not changed
EXPECT_EQ(2u, cmdQ.dispatchAuxTranslationInputs.size()); // not changed
}
HWTEST_F(EnqueueKernelTest, givenMultipleArgsWhenAuxTranslationIsRequiredThenPickOnlyApplicableBuffers) {
class MyCmdQ : public CommandQueueHw<FamilyType> {
public:
MyCmdQ(Context *context, Device *device) : CommandQueueHw<FamilyType>(context, device, nullptr) {}
void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, BuffersForAuxTranslation &buffersForAuxTranslation) override {
CommandQueueHw<FamilyType>::dispatchAuxTranslation(multiDispatchInfo, buffersForAuxTranslation);
inputBuffersForAuxTranslation.push_back(buffersForAuxTranslation);
}
std::vector<BuffersForAuxTranslation> inputBuffersForAuxTranslation;
};
MyCmdQ cmdQ(context, pDevice);
HWTEST_F(EnqueueAuxKernelTests, givenMultipleArgsWhenAuxTranslationIsRequiredThenPickOnlyApplicableBuffers) {
MyCmdQ<FamilyType> cmdQ(context, pDevice);
size_t gws[3] = {1, 0, 0};
MockBuffer buffer0, buffer1, buffer2, buffer3;
cl_mem clMem0 = &buffer0;
@@ -1638,10 +1642,41 @@ HWTEST_F(EnqueueKernelTest, givenMultipleArgsWhenAuxTranslationIsRequiredThenPic
mockKernel.mockKernel->kernelArguments.at(5).type = Kernel::kernelArgType::IMAGE_OBJ; // non-buffer arg - dont insert
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(2u, cmdQ.inputBuffersForAuxTranslation.size());
EXPECT_EQ(1u, cmdQ.inputBuffersForAuxTranslation[0].size()); // before kernel
EXPECT_EQ(1u, cmdQ.inputBuffersForAuxTranslation[1].size()); // after kernel
EXPECT_EQ(2u, cmdQ.dispatchAuxTranslationInputs.size());
EXPECT_EQ(1u, std::get<BuffersForAuxTranslation>(cmdQ.dispatchAuxTranslationInputs.at(0)).size()); // before kernel
EXPECT_EQ(1u, std::get<BuffersForAuxTranslation>(cmdQ.dispatchAuxTranslationInputs.at(1)).size()); // after kernel
EXPECT_EQ(&buffer2, *cmdQ.inputBuffersForAuxTranslation[0].begin());
EXPECT_EQ(&buffer2, *cmdQ.inputBuffersForAuxTranslation[1].begin());
EXPECT_EQ(&buffer2, *std::get<BuffersForAuxTranslation>(cmdQ.dispatchAuxTranslationInputs.at(0)).begin());
EXPECT_EQ(&buffer2, *std::get<BuffersForAuxTranslation>(cmdQ.dispatchAuxTranslationInputs.at(1)).begin());
}
HWTEST_F(EnqueueAuxKernelTests, givenKernelWithRequiredAuxTranslationWhenEnqueuedThenDispatchAuxTranslationBuiltin) {
MockKernelWithInternals mockKernel(*pDevice, context);
MyCmdQ<FamilyType> cmdQ(context, pDevice);
size_t gws[3] = {1, 0, 0};
MockBuffer buffer;
cl_mem clMem = &buffer;
buffer.getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
mockKernel.kernelInfo.kernelArgInfo.resize(1);
mockKernel.kernelInfo.kernelArgInfo.at(0).kernelArgPatchInfoVector.resize(1);
mockKernel.kernelInfo.kernelArgInfo.at(0).pureStatefulBufferAccess = false;
mockKernel.mockKernel->initialize();
mockKernel.mockKernel->auxTranslationRequired = true;
mockKernel.mockKernel->setArgBuffer(0, sizeof(cl_mem *), &clMem);
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(2u, cmdQ.dispatchAuxTranslationInputs.size());
// before kernel
EXPECT_EQ(1u, std::get<size_t>(cmdQ.dispatchAuxTranslationInputs.at(0))); // aux before NDR
auto kernelBefore = std::get<Kernel *>(cmdQ.dispatchAuxTranslationInputs.at(0));
EXPECT_EQ("fullCopy", kernelBefore->getKernelInfo().name);
EXPECT_TRUE(kernelBefore->isBuiltIn);
// after kernel
EXPECT_EQ(3u, std::get<size_t>(cmdQ.dispatchAuxTranslationInputs.at(1))); // aux + NDR + aux
auto kernelAfter = std::get<Kernel *>(cmdQ.dispatchAuxTranslationInputs.at(1));
EXPECT_EQ("fullCopy", kernelAfter->getKernelInfo().name);
EXPECT_TRUE(kernelAfter->isBuiltIn);
}

View File

@@ -541,3 +541,9 @@ TEST_F(CloneKernelTest, cloneKernelWithExecInfo) {
pContext->getSVMAllocsManager()->freeSVMAlloc(ptrSVM);
}
TEST_F(CloneKernelTest, givenBuiltinSourceKernelWhenCloningThenSetBuiltinFlagToClonedKernel) {
pSourceKernel->isBuiltIn = true;
pClonedKernel->cloneKernel(pSourceKernel);
EXPECT_TRUE(pClonedKernel->isBuiltIn);
}