Pick applicable buffers for aux translation

Change-Id: I60a28cd9e0dec61120b1ae5c42dfe0cb852eb387
This commit is contained in:
Dunajski, Bartosz 2018-08-07 15:09:16 +02:00 committed by sys_ocldev
parent 428fdb4ed4
commit b4f53fdfa7
8 changed files with 87 additions and 5 deletions

View File

@ -415,7 +415,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
virtual void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueue, unsigned int commandType){};
MOCKABLE_VIRTUAL void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo) {}
MOCKABLE_VIRTUAL void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, BuffersForAuxTranslation &buffersForAuxTranslation) {}
Context *context;
Device *device;

View File

@ -83,8 +83,10 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
if (DebugManager.flags.ForceDispatchScheduler.get()) {
forceDispatchScheduler(multiDispatchInfo);
} else {
BuffersForAuxTranslation buffersForAuxTranslation;
if (kernel->isAuxTranslationRequired()) {
dispatchAuxTranslation(multiDispatchInfo);
kernel->fillWithBuffersForAuxTranslation(buffersForAuxTranslation);
dispatchAuxTranslation(multiDispatchInfo, buffersForAuxTranslation);
}
if (kernel->getKernelInfo().builtinDispatchBuilder == nullptr) {
@ -101,7 +103,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
}
}
if (kernel->isAuxTranslationRequired()) {
dispatchAuxTranslation(multiDispatchInfo);
dispatchAuxTranslation(multiDispatchInfo, buffersForAuxTranslation);
}
}

View File

@ -77,6 +77,12 @@ inline const DerivedType *castToObject(const typename DerivedType::BaseType *obj
return const_cast<const DerivedType *>(castToObject<DerivedType>(const_cast<typename DerivedType::BaseType *>(object)));
}
template <typename DerivedType>
inline DerivedType *castToObject(const void *object) {
cl_mem clMem = const_cast<cl_mem>(static_cast<const _cl_mem *>(object));
return castToObject<DerivedType>(clMem);
}
extern std::thread::id invalidThreadID;
class ConditionVariableWithCounter {

View File

@ -24,9 +24,11 @@
#include "runtime/api/cl_types.h"
#include <array>
#include <unordered_set>
namespace OCLRT {
class MemObj;
class Buffer;
enum class QueueThrottle {
LOW,
@ -47,6 +49,7 @@ struct EventsRequest {
using MemObjSizeArray = std::array<size_t, 3>;
using MemObjOffsetArray = std::array<size_t, 3>;
using BuffersForAuxTranslation = std::unordered_set<Buffer *>;
struct TransferProperties {
TransferProperties() = delete;

View File

@ -2103,4 +2103,16 @@ void Kernel::resolveArgs() {
bool Kernel::canTransformImages() const {
return device.getHardwareInfo().pPlatform->eRenderCoreFamily >= IGFX_GEN9_CORE;
}
void Kernel::fillWithBuffersForAuxTranslation(BuffersForAuxTranslation &buffersForAuxTranslation) {
buffersForAuxTranslation.reserve(getKernelArgsNumber());
for (uint32_t i = 0; i < getKernelArgsNumber(); i++) {
if (BUFFER_OBJ == kernelArguments.at(i).type && !kernelInfo.kernelArgInfo.at(i).pureStatefulBufferAccess) {
auto buffer = castToObject<Buffer>(getKernelArg(i));
if (buffer && buffer->getGraphicsAllocation()->getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) {
buffersForAuxTranslation.insert(buffer);
}
}
}
}
} // namespace OCLRT

View File

@ -27,6 +27,7 @@
#include "runtime/helpers/base_object.h"
#include "runtime/helpers/preamble.h"
#include "runtime/helpers/address_patch.h"
#include "runtime/helpers/properties_helper.h"
#include "runtime/program/program.h"
#include "runtime/program/kernel_info.h"
#include "runtime/os_interface/debug_settings_manager.h"
@ -34,6 +35,7 @@
namespace OCLRT {
struct CompletionStamp;
class Buffer;
class GraphicsAllocation;
class ImageTransformer;
class Surface;
@ -384,6 +386,8 @@ class Kernel : public BaseObject<_cl_kernel> {
return usingImagesOnly;
}
void fillWithBuffersForAuxTranslation(BuffersForAuxTranslation &buffersForAuxTranslation);
protected:
struct ObjectCounts {
uint32_t imageCount;

View File

@ -1573,8 +1573,8 @@ HWTEST_F(EnqueueKernelTest, givenKernelWithRequiredAuxTranslationWhenEnqueuedThe
class MyCmdQ : public CommandQueueHw<FamilyType> {
public:
MyCmdQ(Context *context, Device *device) : CommandQueueHw<FamilyType>(context, device, nullptr) {}
void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo) override {
CommandQueueHw<FamilyType>::dispatchAuxTranslation(multiDispatchInfo);
void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, BuffersForAuxTranslation &buffersForAuxTranslation) override {
CommandQueueHw<FamilyType>::dispatchAuxTranslation(multiDispatchInfo, buffersForAuxTranslation);
multiDispatchInfoSizes.push_back(multiDispatchInfo.size());
}
@ -1595,3 +1595,57 @@ HWTEST_F(EnqueueKernelTest, givenKernelWithRequiredAuxTranslationWhenEnqueuedThe
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(2u, cmdQ.multiDispatchInfoSizes.size()); // not changed
}
HWTEST_F(EnqueueKernelTest, givenMultipleArgsWhenAuxTranslationIsRequiredThenPickOnlyApplicableBuffers) {
class MyCmdQ : public CommandQueueHw<FamilyType> {
public:
MyCmdQ(Context *context, Device *device) : CommandQueueHw<FamilyType>(context, device, nullptr) {}
void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, BuffersForAuxTranslation &buffersForAuxTranslation) override {
CommandQueueHw<FamilyType>::dispatchAuxTranslation(multiDispatchInfo, buffersForAuxTranslation);
inputBuffersForAuxTranslation.push_back(buffersForAuxTranslation);
}
std::vector<BuffersForAuxTranslation> inputBuffersForAuxTranslation;
};
MyCmdQ cmdQ(context, pDevice);
size_t gws[3] = {1, 0, 0};
MockBuffer buffer0, buffer1, buffer2, buffer3;
cl_mem clMem0 = &buffer0;
cl_mem clMem1 = &buffer1;
cl_mem clMem2 = &buffer2;
cl_mem clMem3 = &buffer3;
buffer0.getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER);
buffer1.getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER);
buffer2.getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
buffer3.getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
MockKernelWithInternals mockKernel(*pDevice, context);
mockKernel.mockKernel->auxTranslationRequired = true;
mockKernel.kernelInfo.kernelArgInfo.resize(6);
for (auto &kernelInfo : mockKernel.kernelInfo.kernelArgInfo) {
kernelInfo.kernelArgPatchInfoVector.resize(1);
}
mockKernel.mockKernel->initialize();
mockKernel.kernelInfo.kernelArgInfo.at(0).pureStatefulBufferAccess = false;
mockKernel.kernelInfo.kernelArgInfo.at(1).pureStatefulBufferAccess = true;
mockKernel.kernelInfo.kernelArgInfo.at(2).pureStatefulBufferAccess = false;
mockKernel.kernelInfo.kernelArgInfo.at(3).pureStatefulBufferAccess = true;
mockKernel.kernelInfo.kernelArgInfo.at(4).pureStatefulBufferAccess = false;
mockKernel.kernelInfo.kernelArgInfo.at(5).pureStatefulBufferAccess = false;
mockKernel.mockKernel->setArgBuffer(0, sizeof(cl_mem *), &clMem0); // stateless on regular buffer - dont insert
mockKernel.mockKernel->setArgBuffer(1, sizeof(cl_mem *), &clMem1); // stateful on regular buffer - dont insert
mockKernel.mockKernel->setArgBuffer(2, sizeof(cl_mem *), &clMem2); // stateless on BUFFER_COMPRESSED - insert
mockKernel.mockKernel->setArgBuffer(3, sizeof(cl_mem *), &clMem3); // stateful on BUFFER_COMPRESSED - dont insert
mockKernel.mockKernel->setArgBuffer(4, sizeof(cl_mem *), nullptr); // nullptr - dont insert
mockKernel.mockKernel->kernelArguments.at(5).type = Kernel::kernelArgType::IMAGE_OBJ; // non-buffer arg - dont insert
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(2u, cmdQ.inputBuffersForAuxTranslation.size());
EXPECT_EQ(1u, cmdQ.inputBuffersForAuxTranslation[0].size()); // before kernel
EXPECT_EQ(1u, cmdQ.inputBuffersForAuxTranslation[1].size()); // after kernel
EXPECT_EQ(&buffer2, *cmdQ.inputBuffersForAuxTranslation[0].begin());
EXPECT_EQ(&buffer2, *cmdQ.inputBuffersForAuxTranslation[1].begin());
}

View File

@ -38,6 +38,7 @@ namespace OCLRT {
class MockKernel : public Kernel {
public:
using Kernel::auxTranslationRequired;
using Kernel::kernelArguments;
struct BlockPatchValues {
uint64_t offset;