mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-19 06:24:51 +08:00
Aux translation [3/n]: Dispatch AuxTranslation builtin when required
Change-Id: I9bd0294de7980ac01ebb3c2d696eba6fd6a456ec
This commit is contained in:
committed by
sys_ocldev
parent
93c1a7b51b
commit
6ca84c278a
@@ -50,11 +50,12 @@ bool BuiltInOp<HWFamily, EBuiltInOps::AuxTranslation>::buildDispatchInfos(MultiD
|
||||
auto graphicsAllocation = buffer->getGraphicsAllocation();
|
||||
size_t allocationSize = graphicsAllocation->getUnderlyingBufferSize();
|
||||
|
||||
if (operationParams.forceNonAuxMode) {
|
||||
if (AuxTranslationDirection::AuxToNonAux == operationParams.auxTranslationDirection) {
|
||||
builder.setKernel(convertToNonAuxKernel.at(kernelInstanceNumber++).get());
|
||||
builder.setArg(0, buffer);
|
||||
builder.setArgSvm(1, allocationSize, reinterpret_cast<void *>(graphicsAllocation->getGpuAddress()));
|
||||
} else {
|
||||
UNRECOVERABLE_IF(AuxTranslationDirection::NonAuxToAux != operationParams.auxTranslationDirection);
|
||||
builder.setKernel(convertToAuxKernel.at(kernelInstanceNumber++).get());
|
||||
builder.setArgSvm(0, allocationSize, reinterpret_cast<void *>(graphicsAllocation->getGpuAddress()));
|
||||
builder.setArg(1, buffer);
|
||||
|
||||
@@ -54,7 +54,7 @@ class BuiltinDispatchInfoBuilder {
|
||||
GraphicsAllocation *srcSvmAlloc = nullptr;
|
||||
GraphicsAllocation *dstSvmAlloc = nullptr;
|
||||
const BuffersForAuxTranslation *buffersForAuxTranslation = nullptr;
|
||||
bool forceNonAuxMode = false;
|
||||
AuxTranslationDirection auxTranslationDirection = AuxTranslationDirection::None;
|
||||
Vec3<size_t> srcOffset = {0, 0, 0};
|
||||
Vec3<size_t> dstOffset = {0, 0, 0};
|
||||
Vec3<size_t> size = {0, 0, 0};
|
||||
|
||||
@@ -20,7 +20,7 @@
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "runtime/built_ins/sip.h"
|
||||
#include "runtime/built_ins/builtins_dispatch_builder.h"
|
||||
#include "runtime/command_queue/command_queue.h"
|
||||
#include "runtime/command_queue/command_queue_hw.h"
|
||||
#include "runtime/command_stream/command_stream_receiver.h"
|
||||
@@ -594,4 +594,14 @@ void CommandQueue::releaseIndirectHeap(IndirectHeap::Type heapType) {
|
||||
this->getDevice().getCommandStreamReceiver().releaseIndirectHeap(heapType);
|
||||
}
|
||||
|
||||
void CommandQueue::dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, BuffersForAuxTranslation &buffersForAuxTranslation,
|
||||
AuxTranslationDirection auxTranslationDirection) {
|
||||
auto &builder = BuiltIns::getInstance().getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getContext(), getDevice());
|
||||
BuiltinDispatchInfoBuilder::BuiltinOpParams dispatchParams;
|
||||
|
||||
dispatchParams.buffersForAuxTranslation = &buffersForAuxTranslation;
|
||||
dispatchParams.auxTranslationDirection = auxTranslationDirection;
|
||||
|
||||
builder.buildDispatchInfos(multiDispatchInfo, dispatchParams);
|
||||
}
|
||||
} // namespace OCLRT
|
||||
|
||||
@@ -415,7 +415,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
|
||||
virtual void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueue, unsigned int commandType){};
|
||||
|
||||
MOCKABLE_VIRTUAL void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, BuffersForAuxTranslation &buffersForAuxTranslation) {}
|
||||
MOCKABLE_VIRTUAL void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, BuffersForAuxTranslation &buffersForAuxTranslation,
|
||||
AuxTranslationDirection auxTranslationDirection);
|
||||
|
||||
Context *context;
|
||||
Device *device;
|
||||
|
||||
@@ -86,7 +86,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
|
||||
BuffersForAuxTranslation buffersForAuxTranslation;
|
||||
if (kernel->isAuxTranslationRequired()) {
|
||||
kernel->fillWithBuffersForAuxTranslation(buffersForAuxTranslation);
|
||||
dispatchAuxTranslation(multiDispatchInfo, buffersForAuxTranslation);
|
||||
dispatchAuxTranslation(multiDispatchInfo, buffersForAuxTranslation, AuxTranslationDirection::AuxToNonAux);
|
||||
}
|
||||
|
||||
if (kernel->getKernelInfo().builtinDispatchBuilder == nullptr) {
|
||||
@@ -103,7 +103,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
|
||||
}
|
||||
}
|
||||
if (kernel->isAuxTranslationRequired()) {
|
||||
dispatchAuxTranslation(multiDispatchInfo, buffersForAuxTranslation);
|
||||
dispatchAuxTranslation(multiDispatchInfo, buffersForAuxTranslation, AuxTranslationDirection::NonAuxToAux);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -51,6 +51,12 @@ using MemObjSizeArray = std::array<size_t, 3>;
|
||||
using MemObjOffsetArray = std::array<size_t, 3>;
|
||||
using BuffersForAuxTranslation = std::unordered_set<Buffer *>;
|
||||
|
||||
enum class AuxTranslationDirection {
|
||||
None,
|
||||
AuxToNonAux,
|
||||
NonAuxToAux
|
||||
};
|
||||
|
||||
struct TransferProperties {
|
||||
TransferProperties() = delete;
|
||||
|
||||
|
||||
@@ -413,6 +413,8 @@ cl_int Kernel::cloneKernel(Kernel *pSourceKernel) {
|
||||
kernelSvmGfxAllocations.push_back(gfxAlloc);
|
||||
}
|
||||
|
||||
this->isBuiltIn = pSourceKernel->isBuiltIn;
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
@@ -271,7 +271,7 @@ TEST_F(BuiltInTests, givenInputBufferWhenBuildingNonAuxDispatchInfoForAuxTransla
|
||||
|
||||
BuiltinDispatchInfoBuilder::BuiltinOpParams builtinOpsParams;
|
||||
builtinOpsParams.buffersForAuxTranslation = &buffersForAuxTranslation;
|
||||
builtinOpsParams.forceNonAuxMode = true;
|
||||
builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::AuxToNonAux;
|
||||
|
||||
for (auto &buffer : mockBuffer) {
|
||||
buffersForAuxTranslation.insert(&buffer);
|
||||
@@ -319,7 +319,7 @@ TEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxDispatchInfoForAuxTranslatio
|
||||
|
||||
BuiltinDispatchInfoBuilder::BuiltinOpParams builtinOpsParams;
|
||||
builtinOpsParams.buffersForAuxTranslation = &buffersForAuxTranslation;
|
||||
builtinOpsParams.forceNonAuxMode = false;
|
||||
builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::NonAuxToAux;
|
||||
|
||||
for (auto &buffer : mockBuffer) {
|
||||
buffersForAuxTranslation.insert(&buffer);
|
||||
@@ -369,10 +369,10 @@ TEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxTranslationDispatchThenPickD
|
||||
buffersForAuxTranslation.insert(&buffer);
|
||||
}
|
||||
|
||||
builtinOpsParams.forceNonAuxMode = true;
|
||||
builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::AuxToNonAux;
|
||||
EXPECT_TRUE(builder.buildDispatchInfos(multiDispatchInfo, builtinOpsParams));
|
||||
|
||||
builtinOpsParams.forceNonAuxMode = false;
|
||||
builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::NonAuxToAux;
|
||||
EXPECT_TRUE(builder.buildDispatchInfos(multiDispatchInfo, builtinOpsParams));
|
||||
|
||||
EXPECT_EQ(6u, multiDispatchInfo.size());
|
||||
@@ -388,6 +388,22 @@ TEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxTranslationDispatchThenPickD
|
||||
EXPECT_NE(builtinKernels[2], builtinKernels[5]);
|
||||
}
|
||||
|
||||
TEST_F(BuiltInTests, givenInvalidAuxTranslationDirectionWhenBuildingDispatchInfosThenAbort) {
|
||||
BuiltinDispatchInfoBuilder &builder = pBuiltIns->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, *pContext, *pDevice);
|
||||
|
||||
BuffersForAuxTranslation buffersForAuxTranslation;
|
||||
MockBuffer mockBuffer;
|
||||
|
||||
MultiDispatchInfo multiDispatchInfo;
|
||||
BuiltinDispatchInfoBuilder::BuiltinOpParams builtinOpsParams;
|
||||
builtinOpsParams.buffersForAuxTranslation = &buffersForAuxTranslation;
|
||||
|
||||
buffersForAuxTranslation.insert(&mockBuffer);
|
||||
|
||||
builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::None;
|
||||
EXPECT_THROW(builder.buildDispatchInfos(multiDispatchInfo, builtinOpsParams), std::exception);
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
class MockAuxBuilInOp : public BuiltInOp<Family, EBuiltInOps::AuxTranslation> {
|
||||
public:
|
||||
@@ -418,6 +434,7 @@ HWTEST_F(BuiltInTests, givenMoreBuffersForAuxTranslationThanKernelInstancesWhenD
|
||||
MockBuffer mockBuffer[7];
|
||||
|
||||
builtinOpsParams.buffersForAuxTranslation = &buffersForAuxTranslation;
|
||||
builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::AuxToNonAux;
|
||||
|
||||
for (auto &buffer : mockBuffer) {
|
||||
buffersForAuxTranslation.insert(&buffer);
|
||||
|
||||
@@ -1565,45 +1565,49 @@ HWTEST_F(EnqueueKernelTest, givenNonVMEKernelWhenEnqueueKernelThenDispatchFlagsD
|
||||
EXPECT_FALSE(mockCsr->passedDispatchFlags.mediaSamplerRequired);
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueKernelTest, givenKernelWithRequiredAuxTranslationWhenEnqueuedThenGuardKernelWithAuxTranslations) {
|
||||
struct EnqueueAuxKernelTests : public EnqueueKernelTest {
|
||||
template <typename FamilyType>
|
||||
class MyCmdQ : public CommandQueueHw<FamilyType> {
|
||||
public:
|
||||
MyCmdQ(Context *context, Device *device) : CommandQueueHw<FamilyType>(context, device, nullptr) {}
|
||||
void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, BuffersForAuxTranslation &buffersForAuxTranslation) override {
|
||||
CommandQueueHw<FamilyType>::dispatchAuxTranslation(multiDispatchInfo, buffersForAuxTranslation);
|
||||
multiDispatchInfoSizes.push_back(multiDispatchInfo.size());
|
||||
void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, BuffersForAuxTranslation &buffersForAuxTranslation,
|
||||
AuxTranslationDirection auxTranslationDirection) override {
|
||||
CommandQueueHw<FamilyType>::dispatchAuxTranslation(multiDispatchInfo, buffersForAuxTranslation, auxTranslationDirection);
|
||||
Kernel *lastKernel = nullptr;
|
||||
for (const auto &dispatchInfo : multiDispatchInfo) {
|
||||
lastKernel = dispatchInfo.getKernel();
|
||||
}
|
||||
dispatchAuxTranslationInputs.emplace_back(lastKernel, multiDispatchInfo.size(), buffersForAuxTranslation, auxTranslationDirection);
|
||||
}
|
||||
|
||||
std::vector<size_t> multiDispatchInfoSizes;
|
||||
std::vector<std::tuple<Kernel *, size_t, BuffersForAuxTranslation, AuxTranslationDirection>> dispatchAuxTranslationInputs;
|
||||
};
|
||||
};
|
||||
|
||||
HWTEST_F(EnqueueAuxKernelTests, givenKernelWithRequiredAuxTranslationWhenEnqueuedThenGuardKernelWithAuxTranslations) {
|
||||
MockKernelWithInternals mockKernel(*pDevice, context);
|
||||
MyCmdQ cmdQ(context, pDevice);
|
||||
MyCmdQ<FamilyType> cmdQ(context, pDevice);
|
||||
size_t gws[3] = {1, 0, 0};
|
||||
|
||||
mockKernel.mockKernel->auxTranslationRequired = true;
|
||||
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(2u, cmdQ.multiDispatchInfoSizes.size());
|
||||
EXPECT_EQ(0u, cmdQ.multiDispatchInfoSizes.at(0)); // before kernel
|
||||
EXPECT_EQ(1u, cmdQ.multiDispatchInfoSizes.at(1)); // after kernel
|
||||
EXPECT_EQ(2u, cmdQ.dispatchAuxTranslationInputs.size());
|
||||
|
||||
// before kernel
|
||||
EXPECT_EQ(0u, std::get<size_t>(cmdQ.dispatchAuxTranslationInputs.at(0)));
|
||||
EXPECT_EQ(AuxTranslationDirection::AuxToNonAux, std::get<AuxTranslationDirection>(cmdQ.dispatchAuxTranslationInputs.at(0)));
|
||||
|
||||
// after kernel
|
||||
EXPECT_EQ(1u, std::get<size_t>(cmdQ.dispatchAuxTranslationInputs.at(1)));
|
||||
EXPECT_EQ(AuxTranslationDirection::NonAuxToAux, std::get<AuxTranslationDirection>(cmdQ.dispatchAuxTranslationInputs.at(1)));
|
||||
|
||||
mockKernel.mockKernel->auxTranslationRequired = false;
|
||||
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(2u, cmdQ.multiDispatchInfoSizes.size()); // not changed
|
||||
EXPECT_EQ(2u, cmdQ.dispatchAuxTranslationInputs.size()); // not changed
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueKernelTest, givenMultipleArgsWhenAuxTranslationIsRequiredThenPickOnlyApplicableBuffers) {
|
||||
class MyCmdQ : public CommandQueueHw<FamilyType> {
|
||||
public:
|
||||
MyCmdQ(Context *context, Device *device) : CommandQueueHw<FamilyType>(context, device, nullptr) {}
|
||||
void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, BuffersForAuxTranslation &buffersForAuxTranslation) override {
|
||||
CommandQueueHw<FamilyType>::dispatchAuxTranslation(multiDispatchInfo, buffersForAuxTranslation);
|
||||
inputBuffersForAuxTranslation.push_back(buffersForAuxTranslation);
|
||||
}
|
||||
|
||||
std::vector<BuffersForAuxTranslation> inputBuffersForAuxTranslation;
|
||||
};
|
||||
MyCmdQ cmdQ(context, pDevice);
|
||||
HWTEST_F(EnqueueAuxKernelTests, givenMultipleArgsWhenAuxTranslationIsRequiredThenPickOnlyApplicableBuffers) {
|
||||
MyCmdQ<FamilyType> cmdQ(context, pDevice);
|
||||
size_t gws[3] = {1, 0, 0};
|
||||
MockBuffer buffer0, buffer1, buffer2, buffer3;
|
||||
cl_mem clMem0 = &buffer0;
|
||||
@@ -1638,10 +1642,41 @@ HWTEST_F(EnqueueKernelTest, givenMultipleArgsWhenAuxTranslationIsRequiredThenPic
|
||||
mockKernel.mockKernel->kernelArguments.at(5).type = Kernel::kernelArgType::IMAGE_OBJ; // non-buffer arg - dont insert
|
||||
|
||||
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(2u, cmdQ.inputBuffersForAuxTranslation.size());
|
||||
EXPECT_EQ(1u, cmdQ.inputBuffersForAuxTranslation[0].size()); // before kernel
|
||||
EXPECT_EQ(1u, cmdQ.inputBuffersForAuxTranslation[1].size()); // after kernel
|
||||
EXPECT_EQ(2u, cmdQ.dispatchAuxTranslationInputs.size());
|
||||
EXPECT_EQ(1u, std::get<BuffersForAuxTranslation>(cmdQ.dispatchAuxTranslationInputs.at(0)).size()); // before kernel
|
||||
EXPECT_EQ(1u, std::get<BuffersForAuxTranslation>(cmdQ.dispatchAuxTranslationInputs.at(1)).size()); // after kernel
|
||||
|
||||
EXPECT_EQ(&buffer2, *cmdQ.inputBuffersForAuxTranslation[0].begin());
|
||||
EXPECT_EQ(&buffer2, *cmdQ.inputBuffersForAuxTranslation[1].begin());
|
||||
EXPECT_EQ(&buffer2, *std::get<BuffersForAuxTranslation>(cmdQ.dispatchAuxTranslationInputs.at(0)).begin());
|
||||
EXPECT_EQ(&buffer2, *std::get<BuffersForAuxTranslation>(cmdQ.dispatchAuxTranslationInputs.at(1)).begin());
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueAuxKernelTests, givenKernelWithRequiredAuxTranslationWhenEnqueuedThenDispatchAuxTranslationBuiltin) {
|
||||
MockKernelWithInternals mockKernel(*pDevice, context);
|
||||
MyCmdQ<FamilyType> cmdQ(context, pDevice);
|
||||
size_t gws[3] = {1, 0, 0};
|
||||
MockBuffer buffer;
|
||||
cl_mem clMem = &buffer;
|
||||
|
||||
buffer.getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
|
||||
mockKernel.kernelInfo.kernelArgInfo.resize(1);
|
||||
mockKernel.kernelInfo.kernelArgInfo.at(0).kernelArgPatchInfoVector.resize(1);
|
||||
mockKernel.kernelInfo.kernelArgInfo.at(0).pureStatefulBufferAccess = false;
|
||||
mockKernel.mockKernel->initialize();
|
||||
mockKernel.mockKernel->auxTranslationRequired = true;
|
||||
mockKernel.mockKernel->setArgBuffer(0, sizeof(cl_mem *), &clMem);
|
||||
|
||||
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(2u, cmdQ.dispatchAuxTranslationInputs.size());
|
||||
|
||||
// before kernel
|
||||
EXPECT_EQ(1u, std::get<size_t>(cmdQ.dispatchAuxTranslationInputs.at(0))); // aux before NDR
|
||||
auto kernelBefore = std::get<Kernel *>(cmdQ.dispatchAuxTranslationInputs.at(0));
|
||||
EXPECT_EQ("fullCopy", kernelBefore->getKernelInfo().name);
|
||||
EXPECT_TRUE(kernelBefore->isBuiltIn);
|
||||
|
||||
// after kernel
|
||||
EXPECT_EQ(3u, std::get<size_t>(cmdQ.dispatchAuxTranslationInputs.at(1))); // aux + NDR + aux
|
||||
auto kernelAfter = std::get<Kernel *>(cmdQ.dispatchAuxTranslationInputs.at(1));
|
||||
EXPECT_EQ("fullCopy", kernelAfter->getKernelInfo().name);
|
||||
EXPECT_TRUE(kernelAfter->isBuiltIn);
|
||||
}
|
||||
|
||||
@@ -541,3 +541,9 @@ TEST_F(CloneKernelTest, cloneKernelWithExecInfo) {
|
||||
|
||||
pContext->getSVMAllocsManager()->freeSVMAlloc(ptrSVM);
|
||||
}
|
||||
|
||||
TEST_F(CloneKernelTest, givenBuiltinSourceKernelWhenCloningThenSetBuiltinFlagToClonedKernel) {
|
||||
pSourceKernel->isBuiltIn = true;
|
||||
pClonedKernel->cloneKernel(pSourceKernel);
|
||||
EXPECT_TRUE(pClonedKernel->isBuiltIn);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user