Aux translation [1/n]

- Mark Kernel for aux translation
- Initial implementation of dispatchAuxTranslation for future use

Change-Id: Ifca1c9a893876eecc5678cdc4f564b2bfcae959a
This commit is contained in:
Dunajski, Bartosz
2018-08-03 08:14:43 +02:00
committed by sys_ocldev
parent 72740fc048
commit ec6f0f9f86
7 changed files with 78 additions and 33 deletions

View File

@@ -415,6 +415,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
virtual void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueue, unsigned int commandType){};
MOCKABLE_VIRTUAL void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo) {}
Context *context;
Device *device;

View File

@@ -83,6 +83,10 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
if (DebugManager.flags.ForceDispatchScheduler.get()) {
forceDispatchScheduler(multiDispatchInfo);
} else {
if (kernel->isAuxTranslationRequired()) {
dispatchAuxTranslation(multiDispatchInfo);
}
if (kernel->getKernelInfo().builtinDispatchBuilder == nullptr) {
DispatchInfoBuilder<SplitDispatch::Dim::d3D, SplitDispatch::SplitMode::WalkerSplit> builder;
builder.setDispatchGeometry(workDim, workItems, localWorkSizesIn, globalOffsets);
@@ -96,6 +100,9 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
return;
}
}
if (kernel->isAuxTranslationRequired()) {
dispatchAuxTranslation(multiDispatchInfo);
}
}
enqueueHandler<commandType>(surfaces, blocking, multiDispatchInfo, numEventsInWaitList, eventWaitList, event);

View File

@@ -98,7 +98,6 @@ Kernel::Kernel(Program *programArg, const KernelInfo &kernelInfoArg, const Devic
kernelInfo(kernelInfoArg),
numberOfBindingTableStates(0),
localBindingTableOffset(0),
pSshLocal(nullptr),
sshLocalSize(0),
crossThreadData(nullptr),
crossThreadDataSize(0),
@@ -111,9 +110,6 @@ Kernel::Kernel(Program *programArg, const KernelInfo &kernelInfoArg, const Devic
}
Kernel::~Kernel() {
delete[] pSshLocal;
pSshLocal = nullptr;
delete[] crossThreadData;
crossThreadData = nullptr;
crossThreadDataSize = 0;
@@ -245,10 +241,10 @@ cl_int Kernel::initialize() {
: 0;
if (sshLocalSize) {
pSshLocal = new char[sshLocalSize];
pSshLocal = std::make_unique<char[]>(sshLocalSize);
// copy the ssh into our local copy
memcpy_s(pSshLocal, sshLocalSize, heapInfo.pSsh, sshLocalSize);
memcpy_s(pSshLocal.get(), sshLocalSize, heapInfo.pSsh, sshLocalSize);
}
numberOfBindingTableStates = (patchInfo.bindingTableState != nullptr) ? patchInfo.bindingTableState->Count : 0;
localBindingTableOffset = (patchInfo.bindingTableState != nullptr) ? patchInfo.bindingTableState->Offset : 0;
@@ -334,6 +330,8 @@ cl_int Kernel::initialize() {
} else if ((argInfo.typeStr.find("*") != std::string::npos) || argInfo.isBuffer) {
kernelArgHandlers[i] = &Kernel::setArgBuffer;
kernelArguments[i].type = BUFFER_OBJ;
this->auxTranslationRequired |= !kernelInfo.kernelArgInfo[i].pureStatefulBufferAccess &&
getDevice().getHardwareInfo().capabilityTable.ftrRenderCompressedBuffers;
} else if (argInfo.isImage) {
kernelArgHandlers[i] = &Kernel::setArgImage;
kernelArguments[i].type = IMAGE_OBJ;
@@ -734,7 +732,7 @@ void Kernel::setStartOffset(uint32_t offset) {
const void *Kernel::getSurfaceStateHeap() const {
return kernelInfo.usesSsh
? pSshLocal
? pSshLocal.get()
: nullptr;
}
@@ -761,8 +759,7 @@ size_t Kernel::getNumberOfBindingTableStates() const {
}
void Kernel::resizeSurfaceStateHeap(void *pNewSsh, size_t newSshSize, size_t newBindingTableCount, size_t newBindingTableOffset) {
delete[] pSshLocal;
pSshLocal = reinterpret_cast<char *>(pNewSsh);
pSshLocal.reset(reinterpret_cast<char *>(pNewSsh));
sshLocalSize = static_cast<uint32_t>(newSshSize);
numberOfBindingTableStates = newBindingTableCount;
localBindingTableOffset = newBindingTableOffset;

View File

@@ -115,6 +115,8 @@ class Kernel : public BaseObject<_cl_kernel> {
return kernelArg == BUFFER_OBJ || kernelArg == IMAGE_OBJ || kernelArg == PIPE_OBJ;
}
bool isAuxTranslationRequired() const { return auxTranslationRequired; }
char *getCrossThreadData() const {
return crossThreadData;
}
@@ -475,7 +477,7 @@ class Kernel : public BaseObject<_cl_kernel> {
size_t numberOfBindingTableStates;
size_t localBindingTableOffset;
char *pSshLocal;
std::unique_ptr<char[]> pSshLocal;
uint32_t sshLocalSize;
char *crossThreadData;
@@ -487,6 +489,7 @@ class Kernel : public BaseObject<_cl_kernel> {
GraphicsAllocation *kernelReflectionSurface;
bool usingSharedObjArgs;
bool auxTranslationRequired = false;
uint32_t patchedArgumentsNum = 0;
uint32_t startOffset = 0;

View File

@@ -22,6 +22,7 @@
#include "runtime/built_ins/built_ins.h"
#include "runtime/built_ins/builtins_dispatch_builder.h"
#include "runtime/command_queue/command_queue_hw.h"
#include "reg_configs_common.h"
#include "runtime/helpers/preamble.h"
#include "runtime/memory_manager/graphics_allocation.h"
@@ -1567,3 +1568,30 @@ HWTEST_F(EnqueueKernelTest, givenNonVMEKernelWhenEnqueueKernelThenDispatchFlagsD
clEnqueueNDRangeKernel(this->pCmdQ, mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_FALSE(mockCsr->passedDispatchFlags.mediaSamplerRequired);
}
HWTEST_F(EnqueueKernelTest, givenKernelWithRequiredAuxTranslationWhenEnqueuedThenGuardKernelWithAuxTranslations) {
class MyCmdQ : public CommandQueueHw<FamilyType> {
public:
MyCmdQ(Context *context, Device *device) : CommandQueueHw<FamilyType>(context, device, nullptr) {}
void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo) override {
CommandQueueHw<FamilyType>::dispatchAuxTranslation(multiDispatchInfo);
multiDispatchInfoSizes.push_back(multiDispatchInfo.size());
}
std::vector<size_t> multiDispatchInfoSizes;
};
MockKernelWithInternals mockKernel(*pDevice, context);
MyCmdQ cmdQ(context, pDevice);
size_t gws[3] = {1, 0, 0};
mockKernel.mockKernel->auxTranslationRequired = true;
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(2u, cmdQ.multiDispatchInfoSizes.size());
EXPECT_EQ(0u, cmdQ.multiDispatchInfoSizes.at(0)); // before kernel
EXPECT_EQ(1u, cmdQ.multiDispatchInfoSizes.at(1)); // after kernel
mockKernel.mockKernel->auxTranslationRequired = false;
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(2u, cmdQ.multiDispatchInfoSizes.size()); // not changed
}

View File

@@ -2211,3 +2211,25 @@ TEST(KernelTest, givenKernelWithKernelInfoWith64bitPointerSizeThenReport64bit) {
EXPECT_FALSE(kernel->is32Bit());
}
TEST(KernelTest, givenFtrRenderCompressedBuffersWhenInitializingArgsWithNonStatefulAccessThenMarkKernelForAuxTranslation) {
HardwareInfo localHwInfo = *platformDevices[0];
auto device = std::unique_ptr<Device>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&localHwInfo));
MockKernelWithInternals kernel(*device);
kernel.kernelInfo.kernelArgInfo.resize(1);
kernel.kernelInfo.kernelArgInfo.at(0).typeStr = "char *";
localHwInfo.capabilityTable.ftrRenderCompressedBuffers = false;
kernel.kernelInfo.kernelArgInfo.at(0).pureStatefulBufferAccess = true;
kernel.mockKernel->initialize();
EXPECT_FALSE(kernel.mockKernel->isAuxTranslationRequired());
kernel.kernelInfo.kernelArgInfo.at(0).pureStatefulBufferAccess = false;
kernel.mockKernel->initialize();
EXPECT_FALSE(kernel.mockKernel->isAuxTranslationRequired());
localHwInfo.capabilityTable.ftrRenderCompressedBuffers = true;
kernel.mockKernel->initialize();
EXPECT_TRUE(kernel.mockKernel->isAuxTranslationRequired());
}

View File

@@ -37,6 +37,8 @@ namespace OCLRT {
////////////////////////////////////////////////////////////////////////////////
class MockKernel : public Kernel {
public:
using Kernel::auxTranslationRequired;
struct BlockPatchValues {
uint64_t offset;
uint32_t size;
@@ -94,10 +96,6 @@ class MockKernel : public Kernel {
Kernel::crossThreadData = nullptr;
}
if (Kernel::pSshLocal == mockSshLocal.data()) {
Kernel::pSshLocal = nullptr;
}
if (kernelInfoAllocated) {
delete kernelInfoAllocated->heapInfo.pKernelHeader;
delete kernelInfoAllocated->patchInfo.executionEnvironment;
@@ -148,9 +146,6 @@ class MockKernel : public Kernel {
uint32_t getPatchedArgumentsNum() const { return patchedArgumentsNum; }
cl_int initialize() {
return Kernel::initialize();
}
bool isPatched() const override;
bool canTransformImages() const override;
@@ -177,25 +172,16 @@ class MockKernel : public Kernel {
}
void setSshLocal(const void *sshPattern, uint32_t newSshSize) {
if ((Kernel::pSshLocal != nullptr) && (Kernel::pSshLocal != mockSshLocal.data())) {
delete[] Kernel::pSshLocal;
Kernel::pSshLocal = nullptr;
Kernel::sshLocalSize = 0;
}
if (sshPattern && (newSshSize > 0)) {
mockSshLocal.clear();
mockSshLocal.insert(mockSshLocal.begin(), (char *)sshPattern, ((char *)sshPattern) + newSshSize);
} else {
mockSshLocal.resize(newSshSize, 0);
}
sshLocalSize = newSshSize;
if (newSshSize == 0) {
return;
pSshLocal.reset(nullptr);
} else {
pSshLocal = std::make_unique<char[]>(newSshSize);
if (sshPattern) {
memcpy_s(pSshLocal.get(), newSshSize, sshPattern, newSshSize);
}
}
Kernel::pSshLocal = mockSshLocal.data();
Kernel::sshLocalSize = static_cast<uint32_t>(mockSshLocal.size());
}
void setPrivateSurface(GraphicsAllocation *gfxAllocation, uint32_t size) {