Pick applicable buffers for aux translation
Change-Id: I60a28cd9e0dec61120b1ae5c42dfe0cb852eb387
This commit is contained in:
parent
428fdb4ed4
commit
b4f53fdfa7
|
@ -415,7 +415,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||||
|
|
||||||
virtual void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueue, unsigned int commandType){};
|
virtual void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueue, unsigned int commandType){};
|
||||||
|
|
||||||
MOCKABLE_VIRTUAL void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo) {}
|
MOCKABLE_VIRTUAL void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, BuffersForAuxTranslation &buffersForAuxTranslation) {}
|
||||||
|
|
||||||
Context *context;
|
Context *context;
|
||||||
Device *device;
|
Device *device;
|
||||||
|
|
|
@ -83,8 +83,10 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
|
||||||
if (DebugManager.flags.ForceDispatchScheduler.get()) {
|
if (DebugManager.flags.ForceDispatchScheduler.get()) {
|
||||||
forceDispatchScheduler(multiDispatchInfo);
|
forceDispatchScheduler(multiDispatchInfo);
|
||||||
} else {
|
} else {
|
||||||
|
BuffersForAuxTranslation buffersForAuxTranslation;
|
||||||
if (kernel->isAuxTranslationRequired()) {
|
if (kernel->isAuxTranslationRequired()) {
|
||||||
dispatchAuxTranslation(multiDispatchInfo);
|
kernel->fillWithBuffersForAuxTranslation(buffersForAuxTranslation);
|
||||||
|
dispatchAuxTranslation(multiDispatchInfo, buffersForAuxTranslation);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (kernel->getKernelInfo().builtinDispatchBuilder == nullptr) {
|
if (kernel->getKernelInfo().builtinDispatchBuilder == nullptr) {
|
||||||
|
@ -101,7 +103,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (kernel->isAuxTranslationRequired()) {
|
if (kernel->isAuxTranslationRequired()) {
|
||||||
dispatchAuxTranslation(multiDispatchInfo);
|
dispatchAuxTranslation(multiDispatchInfo, buffersForAuxTranslation);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -77,6 +77,12 @@ inline const DerivedType *castToObject(const typename DerivedType::BaseType *obj
|
||||||
return const_cast<const DerivedType *>(castToObject<DerivedType>(const_cast<typename DerivedType::BaseType *>(object)));
|
return const_cast<const DerivedType *>(castToObject<DerivedType>(const_cast<typename DerivedType::BaseType *>(object)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename DerivedType>
|
||||||
|
inline DerivedType *castToObject(const void *object) {
|
||||||
|
cl_mem clMem = const_cast<cl_mem>(static_cast<const _cl_mem *>(object));
|
||||||
|
return castToObject<DerivedType>(clMem);
|
||||||
|
}
|
||||||
|
|
||||||
extern std::thread::id invalidThreadID;
|
extern std::thread::id invalidThreadID;
|
||||||
|
|
||||||
class ConditionVariableWithCounter {
|
class ConditionVariableWithCounter {
|
||||||
|
|
|
@ -24,9 +24,11 @@
|
||||||
|
|
||||||
#include "runtime/api/cl_types.h"
|
#include "runtime/api/cl_types.h"
|
||||||
#include <array>
|
#include <array>
|
||||||
|
#include <unordered_set>
|
||||||
|
|
||||||
namespace OCLRT {
|
namespace OCLRT {
|
||||||
class MemObj;
|
class MemObj;
|
||||||
|
class Buffer;
|
||||||
|
|
||||||
enum class QueueThrottle {
|
enum class QueueThrottle {
|
||||||
LOW,
|
LOW,
|
||||||
|
@ -47,6 +49,7 @@ struct EventsRequest {
|
||||||
|
|
||||||
using MemObjSizeArray = std::array<size_t, 3>;
|
using MemObjSizeArray = std::array<size_t, 3>;
|
||||||
using MemObjOffsetArray = std::array<size_t, 3>;
|
using MemObjOffsetArray = std::array<size_t, 3>;
|
||||||
|
using BuffersForAuxTranslation = std::unordered_set<Buffer *>;
|
||||||
|
|
||||||
struct TransferProperties {
|
struct TransferProperties {
|
||||||
TransferProperties() = delete;
|
TransferProperties() = delete;
|
||||||
|
|
|
@ -2103,4 +2103,16 @@ void Kernel::resolveArgs() {
|
||||||
bool Kernel::canTransformImages() const {
|
bool Kernel::canTransformImages() const {
|
||||||
return device.getHardwareInfo().pPlatform->eRenderCoreFamily >= IGFX_GEN9_CORE;
|
return device.getHardwareInfo().pPlatform->eRenderCoreFamily >= IGFX_GEN9_CORE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Kernel::fillWithBuffersForAuxTranslation(BuffersForAuxTranslation &buffersForAuxTranslation) {
|
||||||
|
buffersForAuxTranslation.reserve(getKernelArgsNumber());
|
||||||
|
for (uint32_t i = 0; i < getKernelArgsNumber(); i++) {
|
||||||
|
if (BUFFER_OBJ == kernelArguments.at(i).type && !kernelInfo.kernelArgInfo.at(i).pureStatefulBufferAccess) {
|
||||||
|
auto buffer = castToObject<Buffer>(getKernelArg(i));
|
||||||
|
if (buffer && buffer->getGraphicsAllocation()->getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) {
|
||||||
|
buffersForAuxTranslation.insert(buffer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
} // namespace OCLRT
|
} // namespace OCLRT
|
||||||
|
|
|
@ -27,6 +27,7 @@
|
||||||
#include "runtime/helpers/base_object.h"
|
#include "runtime/helpers/base_object.h"
|
||||||
#include "runtime/helpers/preamble.h"
|
#include "runtime/helpers/preamble.h"
|
||||||
#include "runtime/helpers/address_patch.h"
|
#include "runtime/helpers/address_patch.h"
|
||||||
|
#include "runtime/helpers/properties_helper.h"
|
||||||
#include "runtime/program/program.h"
|
#include "runtime/program/program.h"
|
||||||
#include "runtime/program/kernel_info.h"
|
#include "runtime/program/kernel_info.h"
|
||||||
#include "runtime/os_interface/debug_settings_manager.h"
|
#include "runtime/os_interface/debug_settings_manager.h"
|
||||||
|
@ -34,6 +35,7 @@
|
||||||
|
|
||||||
namespace OCLRT {
|
namespace OCLRT {
|
||||||
struct CompletionStamp;
|
struct CompletionStamp;
|
||||||
|
class Buffer;
|
||||||
class GraphicsAllocation;
|
class GraphicsAllocation;
|
||||||
class ImageTransformer;
|
class ImageTransformer;
|
||||||
class Surface;
|
class Surface;
|
||||||
|
@ -384,6 +386,8 @@ class Kernel : public BaseObject<_cl_kernel> {
|
||||||
return usingImagesOnly;
|
return usingImagesOnly;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void fillWithBuffersForAuxTranslation(BuffersForAuxTranslation &buffersForAuxTranslation);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
struct ObjectCounts {
|
struct ObjectCounts {
|
||||||
uint32_t imageCount;
|
uint32_t imageCount;
|
||||||
|
|
|
@ -1573,8 +1573,8 @@ HWTEST_F(EnqueueKernelTest, givenKernelWithRequiredAuxTranslationWhenEnqueuedThe
|
||||||
class MyCmdQ : public CommandQueueHw<FamilyType> {
|
class MyCmdQ : public CommandQueueHw<FamilyType> {
|
||||||
public:
|
public:
|
||||||
MyCmdQ(Context *context, Device *device) : CommandQueueHw<FamilyType>(context, device, nullptr) {}
|
MyCmdQ(Context *context, Device *device) : CommandQueueHw<FamilyType>(context, device, nullptr) {}
|
||||||
void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo) override {
|
void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, BuffersForAuxTranslation &buffersForAuxTranslation) override {
|
||||||
CommandQueueHw<FamilyType>::dispatchAuxTranslation(multiDispatchInfo);
|
CommandQueueHw<FamilyType>::dispatchAuxTranslation(multiDispatchInfo, buffersForAuxTranslation);
|
||||||
multiDispatchInfoSizes.push_back(multiDispatchInfo.size());
|
multiDispatchInfoSizes.push_back(multiDispatchInfo.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1595,3 +1595,57 @@ HWTEST_F(EnqueueKernelTest, givenKernelWithRequiredAuxTranslationWhenEnqueuedThe
|
||||||
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||||
EXPECT_EQ(2u, cmdQ.multiDispatchInfoSizes.size()); // not changed
|
EXPECT_EQ(2u, cmdQ.multiDispatchInfoSizes.size()); // not changed
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST_F(EnqueueKernelTest, givenMultipleArgsWhenAuxTranslationIsRequiredThenPickOnlyApplicableBuffers) {
|
||||||
|
class MyCmdQ : public CommandQueueHw<FamilyType> {
|
||||||
|
public:
|
||||||
|
MyCmdQ(Context *context, Device *device) : CommandQueueHw<FamilyType>(context, device, nullptr) {}
|
||||||
|
void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, BuffersForAuxTranslation &buffersForAuxTranslation) override {
|
||||||
|
CommandQueueHw<FamilyType>::dispatchAuxTranslation(multiDispatchInfo, buffersForAuxTranslation);
|
||||||
|
inputBuffersForAuxTranslation.push_back(buffersForAuxTranslation);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<BuffersForAuxTranslation> inputBuffersForAuxTranslation;
|
||||||
|
};
|
||||||
|
MyCmdQ cmdQ(context, pDevice);
|
||||||
|
size_t gws[3] = {1, 0, 0};
|
||||||
|
MockBuffer buffer0, buffer1, buffer2, buffer3;
|
||||||
|
cl_mem clMem0 = &buffer0;
|
||||||
|
cl_mem clMem1 = &buffer1;
|
||||||
|
cl_mem clMem2 = &buffer2;
|
||||||
|
cl_mem clMem3 = &buffer3;
|
||||||
|
buffer0.getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER);
|
||||||
|
buffer1.getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER);
|
||||||
|
buffer2.getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
|
||||||
|
buffer3.getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
|
||||||
|
|
||||||
|
MockKernelWithInternals mockKernel(*pDevice, context);
|
||||||
|
mockKernel.mockKernel->auxTranslationRequired = true;
|
||||||
|
mockKernel.kernelInfo.kernelArgInfo.resize(6);
|
||||||
|
for (auto &kernelInfo : mockKernel.kernelInfo.kernelArgInfo) {
|
||||||
|
kernelInfo.kernelArgPatchInfoVector.resize(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
mockKernel.mockKernel->initialize();
|
||||||
|
mockKernel.kernelInfo.kernelArgInfo.at(0).pureStatefulBufferAccess = false;
|
||||||
|
mockKernel.kernelInfo.kernelArgInfo.at(1).pureStatefulBufferAccess = true;
|
||||||
|
mockKernel.kernelInfo.kernelArgInfo.at(2).pureStatefulBufferAccess = false;
|
||||||
|
mockKernel.kernelInfo.kernelArgInfo.at(3).pureStatefulBufferAccess = true;
|
||||||
|
mockKernel.kernelInfo.kernelArgInfo.at(4).pureStatefulBufferAccess = false;
|
||||||
|
mockKernel.kernelInfo.kernelArgInfo.at(5).pureStatefulBufferAccess = false;
|
||||||
|
|
||||||
|
mockKernel.mockKernel->setArgBuffer(0, sizeof(cl_mem *), &clMem0); // stateless on regular buffer - dont insert
|
||||||
|
mockKernel.mockKernel->setArgBuffer(1, sizeof(cl_mem *), &clMem1); // stateful on regular buffer - dont insert
|
||||||
|
mockKernel.mockKernel->setArgBuffer(2, sizeof(cl_mem *), &clMem2); // stateless on BUFFER_COMPRESSED - insert
|
||||||
|
mockKernel.mockKernel->setArgBuffer(3, sizeof(cl_mem *), &clMem3); // stateful on BUFFER_COMPRESSED - dont insert
|
||||||
|
mockKernel.mockKernel->setArgBuffer(4, sizeof(cl_mem *), nullptr); // nullptr - dont insert
|
||||||
|
mockKernel.mockKernel->kernelArguments.at(5).type = Kernel::kernelArgType::IMAGE_OBJ; // non-buffer arg - dont insert
|
||||||
|
|
||||||
|
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||||
|
EXPECT_EQ(2u, cmdQ.inputBuffersForAuxTranslation.size());
|
||||||
|
EXPECT_EQ(1u, cmdQ.inputBuffersForAuxTranslation[0].size()); // before kernel
|
||||||
|
EXPECT_EQ(1u, cmdQ.inputBuffersForAuxTranslation[1].size()); // after kernel
|
||||||
|
|
||||||
|
EXPECT_EQ(&buffer2, *cmdQ.inputBuffersForAuxTranslation[0].begin());
|
||||||
|
EXPECT_EQ(&buffer2, *cmdQ.inputBuffersForAuxTranslation[1].begin());
|
||||||
|
}
|
||||||
|
|
|
@ -38,6 +38,7 @@ namespace OCLRT {
|
||||||
class MockKernel : public Kernel {
|
class MockKernel : public Kernel {
|
||||||
public:
|
public:
|
||||||
using Kernel::auxTranslationRequired;
|
using Kernel::auxTranslationRequired;
|
||||||
|
using Kernel::kernelArguments;
|
||||||
|
|
||||||
struct BlockPatchValues {
|
struct BlockPatchValues {
|
||||||
uint64_t offset;
|
uint64_t offset;
|
||||||
|
|
Loading…
Reference in New Issue