Pick applicable buffers for aux translation
Change-Id: I60a28cd9e0dec61120b1ae5c42dfe0cb852eb387
This commit is contained in:
parent
428fdb4ed4
commit
b4f53fdfa7
|
@ -415,7 +415,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
|||
|
||||
virtual void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueue, unsigned int commandType){};
|
||||
|
||||
MOCKABLE_VIRTUAL void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo) {}
|
||||
MOCKABLE_VIRTUAL void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, BuffersForAuxTranslation &buffersForAuxTranslation) {}
|
||||
|
||||
Context *context;
|
||||
Device *device;
|
||||
|
|
|
@ -83,8 +83,10 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
|
|||
if (DebugManager.flags.ForceDispatchScheduler.get()) {
|
||||
forceDispatchScheduler(multiDispatchInfo);
|
||||
} else {
|
||||
BuffersForAuxTranslation buffersForAuxTranslation;
|
||||
if (kernel->isAuxTranslationRequired()) {
|
||||
dispatchAuxTranslation(multiDispatchInfo);
|
||||
kernel->fillWithBuffersForAuxTranslation(buffersForAuxTranslation);
|
||||
dispatchAuxTranslation(multiDispatchInfo, buffersForAuxTranslation);
|
||||
}
|
||||
|
||||
if (kernel->getKernelInfo().builtinDispatchBuilder == nullptr) {
|
||||
|
@ -101,7 +103,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
|
|||
}
|
||||
}
|
||||
if (kernel->isAuxTranslationRequired()) {
|
||||
dispatchAuxTranslation(multiDispatchInfo);
|
||||
dispatchAuxTranslation(multiDispatchInfo, buffersForAuxTranslation);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -77,6 +77,12 @@ inline const DerivedType *castToObject(const typename DerivedType::BaseType *obj
|
|||
return const_cast<const DerivedType *>(castToObject<DerivedType>(const_cast<typename DerivedType::BaseType *>(object)));
|
||||
}
|
||||
|
||||
template <typename DerivedType>
|
||||
inline DerivedType *castToObject(const void *object) {
|
||||
cl_mem clMem = const_cast<cl_mem>(static_cast<const _cl_mem *>(object));
|
||||
return castToObject<DerivedType>(clMem);
|
||||
}
|
||||
|
||||
extern std::thread::id invalidThreadID;
|
||||
|
||||
class ConditionVariableWithCounter {
|
||||
|
|
|
@ -24,9 +24,11 @@
|
|||
|
||||
#include "runtime/api/cl_types.h"
|
||||
#include <array>
|
||||
#include <unordered_set>
|
||||
|
||||
namespace OCLRT {
|
||||
class MemObj;
|
||||
class Buffer;
|
||||
|
||||
enum class QueueThrottle {
|
||||
LOW,
|
||||
|
@ -47,6 +49,7 @@ struct EventsRequest {
|
|||
|
||||
using MemObjSizeArray = std::array<size_t, 3>;
|
||||
using MemObjOffsetArray = std::array<size_t, 3>;
|
||||
using BuffersForAuxTranslation = std::unordered_set<Buffer *>;
|
||||
|
||||
struct TransferProperties {
|
||||
TransferProperties() = delete;
|
||||
|
|
|
@ -2103,4 +2103,16 @@ void Kernel::resolveArgs() {
|
|||
bool Kernel::canTransformImages() const {
|
||||
return device.getHardwareInfo().pPlatform->eRenderCoreFamily >= IGFX_GEN9_CORE;
|
||||
}
|
||||
|
||||
void Kernel::fillWithBuffersForAuxTranslation(BuffersForAuxTranslation &buffersForAuxTranslation) {
|
||||
buffersForAuxTranslation.reserve(getKernelArgsNumber());
|
||||
for (uint32_t i = 0; i < getKernelArgsNumber(); i++) {
|
||||
if (BUFFER_OBJ == kernelArguments.at(i).type && !kernelInfo.kernelArgInfo.at(i).pureStatefulBufferAccess) {
|
||||
auto buffer = castToObject<Buffer>(getKernelArg(i));
|
||||
if (buffer && buffer->getGraphicsAllocation()->getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) {
|
||||
buffersForAuxTranslation.insert(buffer);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace OCLRT
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
#include "runtime/helpers/base_object.h"
|
||||
#include "runtime/helpers/preamble.h"
|
||||
#include "runtime/helpers/address_patch.h"
|
||||
#include "runtime/helpers/properties_helper.h"
|
||||
#include "runtime/program/program.h"
|
||||
#include "runtime/program/kernel_info.h"
|
||||
#include "runtime/os_interface/debug_settings_manager.h"
|
||||
|
@ -34,6 +35,7 @@
|
|||
|
||||
namespace OCLRT {
|
||||
struct CompletionStamp;
|
||||
class Buffer;
|
||||
class GraphicsAllocation;
|
||||
class ImageTransformer;
|
||||
class Surface;
|
||||
|
@ -384,6 +386,8 @@ class Kernel : public BaseObject<_cl_kernel> {
|
|||
return usingImagesOnly;
|
||||
}
|
||||
|
||||
void fillWithBuffersForAuxTranslation(BuffersForAuxTranslation &buffersForAuxTranslation);
|
||||
|
||||
protected:
|
||||
struct ObjectCounts {
|
||||
uint32_t imageCount;
|
||||
|
|
|
@ -1573,8 +1573,8 @@ HWTEST_F(EnqueueKernelTest, givenKernelWithRequiredAuxTranslationWhenEnqueuedThe
|
|||
class MyCmdQ : public CommandQueueHw<FamilyType> {
|
||||
public:
|
||||
MyCmdQ(Context *context, Device *device) : CommandQueueHw<FamilyType>(context, device, nullptr) {}
|
||||
void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo) override {
|
||||
CommandQueueHw<FamilyType>::dispatchAuxTranslation(multiDispatchInfo);
|
||||
void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, BuffersForAuxTranslation &buffersForAuxTranslation) override {
|
||||
CommandQueueHw<FamilyType>::dispatchAuxTranslation(multiDispatchInfo, buffersForAuxTranslation);
|
||||
multiDispatchInfoSizes.push_back(multiDispatchInfo.size());
|
||||
}
|
||||
|
||||
|
@ -1595,3 +1595,57 @@ HWTEST_F(EnqueueKernelTest, givenKernelWithRequiredAuxTranslationWhenEnqueuedThe
|
|||
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(2u, cmdQ.multiDispatchInfoSizes.size()); // not changed
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueKernelTest, givenMultipleArgsWhenAuxTranslationIsRequiredThenPickOnlyApplicableBuffers) {
|
||||
class MyCmdQ : public CommandQueueHw<FamilyType> {
|
||||
public:
|
||||
MyCmdQ(Context *context, Device *device) : CommandQueueHw<FamilyType>(context, device, nullptr) {}
|
||||
void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, BuffersForAuxTranslation &buffersForAuxTranslation) override {
|
||||
CommandQueueHw<FamilyType>::dispatchAuxTranslation(multiDispatchInfo, buffersForAuxTranslation);
|
||||
inputBuffersForAuxTranslation.push_back(buffersForAuxTranslation);
|
||||
}
|
||||
|
||||
std::vector<BuffersForAuxTranslation> inputBuffersForAuxTranslation;
|
||||
};
|
||||
MyCmdQ cmdQ(context, pDevice);
|
||||
size_t gws[3] = {1, 0, 0};
|
||||
MockBuffer buffer0, buffer1, buffer2, buffer3;
|
||||
cl_mem clMem0 = &buffer0;
|
||||
cl_mem clMem1 = &buffer1;
|
||||
cl_mem clMem2 = &buffer2;
|
||||
cl_mem clMem3 = &buffer3;
|
||||
buffer0.getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER);
|
||||
buffer1.getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER);
|
||||
buffer2.getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
|
||||
buffer3.getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
|
||||
|
||||
MockKernelWithInternals mockKernel(*pDevice, context);
|
||||
mockKernel.mockKernel->auxTranslationRequired = true;
|
||||
mockKernel.kernelInfo.kernelArgInfo.resize(6);
|
||||
for (auto &kernelInfo : mockKernel.kernelInfo.kernelArgInfo) {
|
||||
kernelInfo.kernelArgPatchInfoVector.resize(1);
|
||||
}
|
||||
|
||||
mockKernel.mockKernel->initialize();
|
||||
mockKernel.kernelInfo.kernelArgInfo.at(0).pureStatefulBufferAccess = false;
|
||||
mockKernel.kernelInfo.kernelArgInfo.at(1).pureStatefulBufferAccess = true;
|
||||
mockKernel.kernelInfo.kernelArgInfo.at(2).pureStatefulBufferAccess = false;
|
||||
mockKernel.kernelInfo.kernelArgInfo.at(3).pureStatefulBufferAccess = true;
|
||||
mockKernel.kernelInfo.kernelArgInfo.at(4).pureStatefulBufferAccess = false;
|
||||
mockKernel.kernelInfo.kernelArgInfo.at(5).pureStatefulBufferAccess = false;
|
||||
|
||||
mockKernel.mockKernel->setArgBuffer(0, sizeof(cl_mem *), &clMem0); // stateless on regular buffer - dont insert
|
||||
mockKernel.mockKernel->setArgBuffer(1, sizeof(cl_mem *), &clMem1); // stateful on regular buffer - dont insert
|
||||
mockKernel.mockKernel->setArgBuffer(2, sizeof(cl_mem *), &clMem2); // stateless on BUFFER_COMPRESSED - insert
|
||||
mockKernel.mockKernel->setArgBuffer(3, sizeof(cl_mem *), &clMem3); // stateful on BUFFER_COMPRESSED - dont insert
|
||||
mockKernel.mockKernel->setArgBuffer(4, sizeof(cl_mem *), nullptr); // nullptr - dont insert
|
||||
mockKernel.mockKernel->kernelArguments.at(5).type = Kernel::kernelArgType::IMAGE_OBJ; // non-buffer arg - dont insert
|
||||
|
||||
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(2u, cmdQ.inputBuffersForAuxTranslation.size());
|
||||
EXPECT_EQ(1u, cmdQ.inputBuffersForAuxTranslation[0].size()); // before kernel
|
||||
EXPECT_EQ(1u, cmdQ.inputBuffersForAuxTranslation[1].size()); // after kernel
|
||||
|
||||
EXPECT_EQ(&buffer2, *cmdQ.inputBuffersForAuxTranslation[0].begin());
|
||||
EXPECT_EQ(&buffer2, *cmdQ.inputBuffersForAuxTranslation[1].begin());
|
||||
}
|
||||
|
|
|
@ -38,6 +38,7 @@ namespace OCLRT {
|
|||
class MockKernel : public Kernel {
|
||||
public:
|
||||
using Kernel::auxTranslationRequired;
|
||||
using Kernel::kernelArguments;
|
||||
|
||||
struct BlockPatchValues {
|
||||
uint64_t offset;
|
||||
|
|
Loading…
Reference in New Issue