From b4f53fdfa75d821d0e1dedd2bf53cbee80385174 Mon Sep 17 00:00:00 2001 From: "Dunajski, Bartosz" Date: Tue, 7 Aug 2018 15:09:16 +0200 Subject: [PATCH] Pick applicable buffers for aux translation Change-Id: I60a28cd9e0dec61120b1ae5c42dfe0cb852eb387 --- runtime/command_queue/command_queue.h | 2 +- runtime/command_queue/enqueue_common.h | 6 +- runtime/helpers/base_object.h | 6 ++ runtime/helpers/properties_helper.h | 3 + runtime/kernel/kernel.cpp | 12 ++++ runtime/kernel/kernel.h | 4 ++ .../command_queue/enqueue_kernel_tests.cpp | 58 ++++++++++++++++++- unit_tests/mocks/mock_kernel.h | 1 + 8 files changed, 87 insertions(+), 5 deletions(-) diff --git a/runtime/command_queue/command_queue.h b/runtime/command_queue/command_queue.h index d114053d70..a8a3daa794 100644 --- a/runtime/command_queue/command_queue.h +++ b/runtime/command_queue/command_queue.h @@ -415,7 +415,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> { virtual void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueue, unsigned int commandType){}; - MOCKABLE_VIRTUAL void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo) {} + MOCKABLE_VIRTUAL void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, BuffersForAuxTranslation &buffersForAuxTranslation) {} Context *context; Device *device; diff --git a/runtime/command_queue/enqueue_common.h b/runtime/command_queue/enqueue_common.h index 37f9be5857..dfd9a6fa6a 100644 --- a/runtime/command_queue/enqueue_common.h +++ b/runtime/command_queue/enqueue_common.h @@ -83,8 +83,10 @@ void CommandQueueHw::enqueueHandler(Surface *(&surfaces)[surfaceCount if (DebugManager.flags.ForceDispatchScheduler.get()) { forceDispatchScheduler(multiDispatchInfo); } else { + BuffersForAuxTranslation buffersForAuxTranslation; if (kernel->isAuxTranslationRequired()) { - dispatchAuxTranslation(multiDispatchInfo); + kernel->fillWithBuffersForAuxTranslation(buffersForAuxTranslation); + dispatchAuxTranslation(multiDispatchInfo, buffersForAuxTranslation); } if (kernel->getKernelInfo().builtinDispatchBuilder == nullptr) { @@ -101,7 +103,7 @@ void CommandQueueHw::enqueueHandler(Surface *(&surfaces)[surfaceCount } } if (kernel->isAuxTranslationRequired()) { - dispatchAuxTranslation(multiDispatchInfo); + dispatchAuxTranslation(multiDispatchInfo, buffersForAuxTranslation); } } diff --git a/runtime/helpers/base_object.h b/runtime/helpers/base_object.h index 515bd4d43a..4ea1e05e11 100644 --- a/runtime/helpers/base_object.h +++ b/runtime/helpers/base_object.h @@ -77,6 +77,12 @@ inline const DerivedType *castToObject(const typename DerivedType::BaseType *obj return const_cast(castToObject(const_cast(object))); } +template +inline DerivedType *castToObject(const void *object) { + cl_mem clMem = const_cast(static_cast(object)); + return castToObject(clMem); +} + extern std::thread::id invalidThreadID; class ConditionVariableWithCounter { diff --git a/runtime/helpers/properties_helper.h b/runtime/helpers/properties_helper.h index 35841d36fb..8a42c7ce79 100644 --- a/runtime/helpers/properties_helper.h +++ b/runtime/helpers/properties_helper.h @@ -24,9 +24,11 @@ #include "runtime/api/cl_types.h" #include +#include namespace OCLRT { class MemObj; +class Buffer; enum class QueueThrottle { LOW, @@ -47,6 +49,7 @@ struct EventsRequest { using MemObjSizeArray = std::array; using MemObjOffsetArray = std::array; +using BuffersForAuxTranslation = std::unordered_set; struct TransferProperties { TransferProperties() = delete; diff --git a/runtime/kernel/kernel.cpp b/runtime/kernel/kernel.cpp index 52a07f24a9..43aefd2d63 100644 --- a/runtime/kernel/kernel.cpp +++ b/runtime/kernel/kernel.cpp @@ -2103,4 +2103,16 @@ void Kernel::resolveArgs() { bool Kernel::canTransformImages() const { return device.getHardwareInfo().pPlatform->eRenderCoreFamily >= IGFX_GEN9_CORE; } + +void Kernel::fillWithBuffersForAuxTranslation(BuffersForAuxTranslation &buffersForAuxTranslation) { + buffersForAuxTranslation.reserve(getKernelArgsNumber()); + for (uint32_t i = 0; i < getKernelArgsNumber(); i++) { + if (BUFFER_OBJ == kernelArguments.at(i).type && !kernelInfo.kernelArgInfo.at(i).pureStatefulBufferAccess) { + auto buffer = castToObject(getKernelArg(i)); + if (buffer && buffer->getGraphicsAllocation()->getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) { + buffersForAuxTranslation.insert(buffer); + } + } + } +} } // namespace OCLRT diff --git a/runtime/kernel/kernel.h b/runtime/kernel/kernel.h index 0b6bd14be3..37d6e86a09 100644 --- a/runtime/kernel/kernel.h +++ b/runtime/kernel/kernel.h @@ -27,6 +27,7 @@ #include "runtime/helpers/base_object.h" #include "runtime/helpers/preamble.h" #include "runtime/helpers/address_patch.h" +#include "runtime/helpers/properties_helper.h" #include "runtime/program/program.h" #include "runtime/program/kernel_info.h" #include "runtime/os_interface/debug_settings_manager.h" @@ -34,6 +35,7 @@ namespace OCLRT { struct CompletionStamp; +class Buffer; class GraphicsAllocation; class ImageTransformer; class Surface; @@ -384,6 +386,8 @@ class Kernel : public BaseObject<_cl_kernel> { return usingImagesOnly; } + void fillWithBuffersForAuxTranslation(BuffersForAuxTranslation &buffersForAuxTranslation); + protected: struct ObjectCounts { uint32_t imageCount; diff --git a/unit_tests/command_queue/enqueue_kernel_tests.cpp b/unit_tests/command_queue/enqueue_kernel_tests.cpp index 7504745b1f..77ca4ff6e5 100644 --- a/unit_tests/command_queue/enqueue_kernel_tests.cpp +++ b/unit_tests/command_queue/enqueue_kernel_tests.cpp @@ -1573,8 +1573,8 @@ HWTEST_F(EnqueueKernelTest, givenKernelWithRequiredAuxTranslationWhenEnqueuedThe class MyCmdQ : public CommandQueueHw { public: MyCmdQ(Context *context, Device *device) : CommandQueueHw(context, device, nullptr) {} - void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo) override { - CommandQueueHw::dispatchAuxTranslation(multiDispatchInfo); + void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, BuffersForAuxTranslation &buffersForAuxTranslation) override { + CommandQueueHw::dispatchAuxTranslation(multiDispatchInfo, buffersForAuxTranslation); multiDispatchInfoSizes.push_back(multiDispatchInfo.size()); } @@ -1595,3 +1595,57 @@ HWTEST_F(EnqueueKernelTest, givenKernelWithRequiredAuxTranslationWhenEnqueuedThe cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(2u, cmdQ.multiDispatchInfoSizes.size()); // not changed } + +HWTEST_F(EnqueueKernelTest, givenMultipleArgsWhenAuxTranslationIsRequiredThenPickOnlyApplicableBuffers) { + class MyCmdQ : public CommandQueueHw { + public: + MyCmdQ(Context *context, Device *device) : CommandQueueHw(context, device, nullptr) {} + void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, BuffersForAuxTranslation &buffersForAuxTranslation) override { + CommandQueueHw::dispatchAuxTranslation(multiDispatchInfo, buffersForAuxTranslation); + inputBuffersForAuxTranslation.push_back(buffersForAuxTranslation); + } + + std::vector inputBuffersForAuxTranslation; + }; + MyCmdQ cmdQ(context, pDevice); + size_t gws[3] = {1, 0, 0}; + MockBuffer buffer0, buffer1, buffer2, buffer3; + cl_mem clMem0 = &buffer0; + cl_mem clMem1 = &buffer1; + cl_mem clMem2 = &buffer2; + cl_mem clMem3 = &buffer3; + buffer0.getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER); + buffer1.getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER); + buffer2.getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED); + buffer3.getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED); + + MockKernelWithInternals mockKernel(*pDevice, context); + mockKernel.mockKernel->auxTranslationRequired = true; + mockKernel.kernelInfo.kernelArgInfo.resize(6); + for (auto &kernelInfo : mockKernel.kernelInfo.kernelArgInfo) { + kernelInfo.kernelArgPatchInfoVector.resize(1); + } + + mockKernel.mockKernel->initialize(); + mockKernel.kernelInfo.kernelArgInfo.at(0).pureStatefulBufferAccess = false; + mockKernel.kernelInfo.kernelArgInfo.at(1).pureStatefulBufferAccess = true; + mockKernel.kernelInfo.kernelArgInfo.at(2).pureStatefulBufferAccess = false; + mockKernel.kernelInfo.kernelArgInfo.at(3).pureStatefulBufferAccess = true; + mockKernel.kernelInfo.kernelArgInfo.at(4).pureStatefulBufferAccess = false; + mockKernel.kernelInfo.kernelArgInfo.at(5).pureStatefulBufferAccess = false; + + mockKernel.mockKernel->setArgBuffer(0, sizeof(cl_mem *), &clMem0); // stateless on regular buffer - dont insert + mockKernel.mockKernel->setArgBuffer(1, sizeof(cl_mem *), &clMem1); // stateful on regular buffer - dont insert + mockKernel.mockKernel->setArgBuffer(2, sizeof(cl_mem *), &clMem2); // stateless on BUFFER_COMPRESSED - insert + mockKernel.mockKernel->setArgBuffer(3, sizeof(cl_mem *), &clMem3); // stateful on BUFFER_COMPRESSED - dont insert + mockKernel.mockKernel->setArgBuffer(4, sizeof(cl_mem *), nullptr); // nullptr - dont insert + mockKernel.mockKernel->kernelArguments.at(5).type = Kernel::kernelArgType::IMAGE_OBJ; // non-buffer arg - dont insert + + cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); + EXPECT_EQ(2u, cmdQ.inputBuffersForAuxTranslation.size()); + EXPECT_EQ(1u, cmdQ.inputBuffersForAuxTranslation[0].size()); // before kernel + EXPECT_EQ(1u, cmdQ.inputBuffersForAuxTranslation[1].size()); // after kernel + + EXPECT_EQ(&buffer2, *cmdQ.inputBuffersForAuxTranslation[0].begin()); + EXPECT_EQ(&buffer2, *cmdQ.inputBuffersForAuxTranslation[1].begin()); +} diff --git a/unit_tests/mocks/mock_kernel.h b/unit_tests/mocks/mock_kernel.h index 9eb96efba6..b51d829ff8 100644 --- a/unit_tests/mocks/mock_kernel.h +++ b/unit_tests/mocks/mock_kernel.h @@ -38,6 +38,7 @@ namespace OCLRT { class MockKernel : public Kernel { public: using Kernel::auxTranslationRequired; + using Kernel::kernelArguments; struct BlockPatchValues { uint64_t offset;