mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-19 06:24:51 +08:00
Aux translation [1/n]
- Mark Kernel for aux translation - Initial implementation of dispatchAuxTranslation for future use Change-Id: Ifca1c9a893876eecc5678cdc4f564b2bfcae959a
This commit is contained in:
committed by
sys_ocldev
parent
72740fc048
commit
ec6f0f9f86
@@ -415,6 +415,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
|
||||
virtual void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueue, unsigned int commandType){};
|
||||
|
||||
MOCKABLE_VIRTUAL void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo) {}
|
||||
|
||||
Context *context;
|
||||
Device *device;
|
||||
|
||||
|
||||
@@ -83,6 +83,10 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
|
||||
if (DebugManager.flags.ForceDispatchScheduler.get()) {
|
||||
forceDispatchScheduler(multiDispatchInfo);
|
||||
} else {
|
||||
if (kernel->isAuxTranslationRequired()) {
|
||||
dispatchAuxTranslation(multiDispatchInfo);
|
||||
}
|
||||
|
||||
if (kernel->getKernelInfo().builtinDispatchBuilder == nullptr) {
|
||||
DispatchInfoBuilder<SplitDispatch::Dim::d3D, SplitDispatch::SplitMode::WalkerSplit> builder;
|
||||
builder.setDispatchGeometry(workDim, workItems, localWorkSizesIn, globalOffsets);
|
||||
@@ -96,6 +100,9 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (kernel->isAuxTranslationRequired()) {
|
||||
dispatchAuxTranslation(multiDispatchInfo);
|
||||
}
|
||||
}
|
||||
|
||||
enqueueHandler<commandType>(surfaces, blocking, multiDispatchInfo, numEventsInWaitList, eventWaitList, event);
|
||||
|
||||
@@ -98,7 +98,6 @@ Kernel::Kernel(Program *programArg, const KernelInfo &kernelInfoArg, const Devic
|
||||
kernelInfo(kernelInfoArg),
|
||||
numberOfBindingTableStates(0),
|
||||
localBindingTableOffset(0),
|
||||
pSshLocal(nullptr),
|
||||
sshLocalSize(0),
|
||||
crossThreadData(nullptr),
|
||||
crossThreadDataSize(0),
|
||||
@@ -111,9 +110,6 @@ Kernel::Kernel(Program *programArg, const KernelInfo &kernelInfoArg, const Devic
|
||||
}
|
||||
|
||||
Kernel::~Kernel() {
|
||||
delete[] pSshLocal;
|
||||
pSshLocal = nullptr;
|
||||
|
||||
delete[] crossThreadData;
|
||||
crossThreadData = nullptr;
|
||||
crossThreadDataSize = 0;
|
||||
@@ -245,10 +241,10 @@ cl_int Kernel::initialize() {
|
||||
: 0;
|
||||
|
||||
if (sshLocalSize) {
|
||||
pSshLocal = new char[sshLocalSize];
|
||||
pSshLocal = std::make_unique<char[]>(sshLocalSize);
|
||||
|
||||
// copy the ssh into our local copy
|
||||
memcpy_s(pSshLocal, sshLocalSize, heapInfo.pSsh, sshLocalSize);
|
||||
memcpy_s(pSshLocal.get(), sshLocalSize, heapInfo.pSsh, sshLocalSize);
|
||||
}
|
||||
numberOfBindingTableStates = (patchInfo.bindingTableState != nullptr) ? patchInfo.bindingTableState->Count : 0;
|
||||
localBindingTableOffset = (patchInfo.bindingTableState != nullptr) ? patchInfo.bindingTableState->Offset : 0;
|
||||
@@ -334,6 +330,8 @@ cl_int Kernel::initialize() {
|
||||
} else if ((argInfo.typeStr.find("*") != std::string::npos) || argInfo.isBuffer) {
|
||||
kernelArgHandlers[i] = &Kernel::setArgBuffer;
|
||||
kernelArguments[i].type = BUFFER_OBJ;
|
||||
this->auxTranslationRequired |= !kernelInfo.kernelArgInfo[i].pureStatefulBufferAccess &&
|
||||
getDevice().getHardwareInfo().capabilityTable.ftrRenderCompressedBuffers;
|
||||
} else if (argInfo.isImage) {
|
||||
kernelArgHandlers[i] = &Kernel::setArgImage;
|
||||
kernelArguments[i].type = IMAGE_OBJ;
|
||||
@@ -734,7 +732,7 @@ void Kernel::setStartOffset(uint32_t offset) {
|
||||
|
||||
const void *Kernel::getSurfaceStateHeap() const {
|
||||
return kernelInfo.usesSsh
|
||||
? pSshLocal
|
||||
? pSshLocal.get()
|
||||
: nullptr;
|
||||
}
|
||||
|
||||
@@ -761,8 +759,7 @@ size_t Kernel::getNumberOfBindingTableStates() const {
|
||||
}
|
||||
|
||||
void Kernel::resizeSurfaceStateHeap(void *pNewSsh, size_t newSshSize, size_t newBindingTableCount, size_t newBindingTableOffset) {
|
||||
delete[] pSshLocal;
|
||||
pSshLocal = reinterpret_cast<char *>(pNewSsh);
|
||||
pSshLocal.reset(reinterpret_cast<char *>(pNewSsh));
|
||||
sshLocalSize = static_cast<uint32_t>(newSshSize);
|
||||
numberOfBindingTableStates = newBindingTableCount;
|
||||
localBindingTableOffset = newBindingTableOffset;
|
||||
|
||||
@@ -115,6 +115,8 @@ class Kernel : public BaseObject<_cl_kernel> {
|
||||
return kernelArg == BUFFER_OBJ || kernelArg == IMAGE_OBJ || kernelArg == PIPE_OBJ;
|
||||
}
|
||||
|
||||
bool isAuxTranslationRequired() const { return auxTranslationRequired; }
|
||||
|
||||
char *getCrossThreadData() const {
|
||||
return crossThreadData;
|
||||
}
|
||||
@@ -475,7 +477,7 @@ class Kernel : public BaseObject<_cl_kernel> {
|
||||
|
||||
size_t numberOfBindingTableStates;
|
||||
size_t localBindingTableOffset;
|
||||
char *pSshLocal;
|
||||
std::unique_ptr<char[]> pSshLocal;
|
||||
uint32_t sshLocalSize;
|
||||
|
||||
char *crossThreadData;
|
||||
@@ -487,6 +489,7 @@ class Kernel : public BaseObject<_cl_kernel> {
|
||||
GraphicsAllocation *kernelReflectionSurface;
|
||||
|
||||
bool usingSharedObjArgs;
|
||||
bool auxTranslationRequired = false;
|
||||
uint32_t patchedArgumentsNum = 0;
|
||||
uint32_t startOffset = 0;
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
|
||||
#include "runtime/built_ins/built_ins.h"
|
||||
#include "runtime/built_ins/builtins_dispatch_builder.h"
|
||||
#include "runtime/command_queue/command_queue_hw.h"
|
||||
#include "reg_configs_common.h"
|
||||
#include "runtime/helpers/preamble.h"
|
||||
#include "runtime/memory_manager/graphics_allocation.h"
|
||||
@@ -1567,3 +1568,30 @@ HWTEST_F(EnqueueKernelTest, givenNonVMEKernelWhenEnqueueKernelThenDispatchFlagsD
|
||||
clEnqueueNDRangeKernel(this->pCmdQ, mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_FALSE(mockCsr->passedDispatchFlags.mediaSamplerRequired);
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueKernelTest, givenKernelWithRequiredAuxTranslationWhenEnqueuedThenGuardKernelWithAuxTranslations) {
|
||||
class MyCmdQ : public CommandQueueHw<FamilyType> {
|
||||
public:
|
||||
MyCmdQ(Context *context, Device *device) : CommandQueueHw<FamilyType>(context, device, nullptr) {}
|
||||
void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo) override {
|
||||
CommandQueueHw<FamilyType>::dispatchAuxTranslation(multiDispatchInfo);
|
||||
multiDispatchInfoSizes.push_back(multiDispatchInfo.size());
|
||||
}
|
||||
|
||||
std::vector<size_t> multiDispatchInfoSizes;
|
||||
};
|
||||
|
||||
MockKernelWithInternals mockKernel(*pDevice, context);
|
||||
MyCmdQ cmdQ(context, pDevice);
|
||||
size_t gws[3] = {1, 0, 0};
|
||||
|
||||
mockKernel.mockKernel->auxTranslationRequired = true;
|
||||
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(2u, cmdQ.multiDispatchInfoSizes.size());
|
||||
EXPECT_EQ(0u, cmdQ.multiDispatchInfoSizes.at(0)); // before kernel
|
||||
EXPECT_EQ(1u, cmdQ.multiDispatchInfoSizes.at(1)); // after kernel
|
||||
|
||||
mockKernel.mockKernel->auxTranslationRequired = false;
|
||||
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(2u, cmdQ.multiDispatchInfoSizes.size()); // not changed
|
||||
}
|
||||
|
||||
@@ -2211,3 +2211,25 @@ TEST(KernelTest, givenKernelWithKernelInfoWith64bitPointerSizeThenReport64bit) {
|
||||
|
||||
EXPECT_FALSE(kernel->is32Bit());
|
||||
}
|
||||
|
||||
TEST(KernelTest, givenFtrRenderCompressedBuffersWhenInitializingArgsWithNonStatefulAccessThenMarkKernelForAuxTranslation) {
|
||||
HardwareInfo localHwInfo = *platformDevices[0];
|
||||
|
||||
auto device = std::unique_ptr<Device>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&localHwInfo));
|
||||
MockKernelWithInternals kernel(*device);
|
||||
kernel.kernelInfo.kernelArgInfo.resize(1);
|
||||
kernel.kernelInfo.kernelArgInfo.at(0).typeStr = "char *";
|
||||
|
||||
localHwInfo.capabilityTable.ftrRenderCompressedBuffers = false;
|
||||
kernel.kernelInfo.kernelArgInfo.at(0).pureStatefulBufferAccess = true;
|
||||
kernel.mockKernel->initialize();
|
||||
EXPECT_FALSE(kernel.mockKernel->isAuxTranslationRequired());
|
||||
|
||||
kernel.kernelInfo.kernelArgInfo.at(0).pureStatefulBufferAccess = false;
|
||||
kernel.mockKernel->initialize();
|
||||
EXPECT_FALSE(kernel.mockKernel->isAuxTranslationRequired());
|
||||
|
||||
localHwInfo.capabilityTable.ftrRenderCompressedBuffers = true;
|
||||
kernel.mockKernel->initialize();
|
||||
EXPECT_TRUE(kernel.mockKernel->isAuxTranslationRequired());
|
||||
}
|
||||
|
||||
@@ -37,6 +37,8 @@ namespace OCLRT {
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
class MockKernel : public Kernel {
|
||||
public:
|
||||
using Kernel::auxTranslationRequired;
|
||||
|
||||
struct BlockPatchValues {
|
||||
uint64_t offset;
|
||||
uint32_t size;
|
||||
@@ -94,10 +96,6 @@ class MockKernel : public Kernel {
|
||||
Kernel::crossThreadData = nullptr;
|
||||
}
|
||||
|
||||
if (Kernel::pSshLocal == mockSshLocal.data()) {
|
||||
Kernel::pSshLocal = nullptr;
|
||||
}
|
||||
|
||||
if (kernelInfoAllocated) {
|
||||
delete kernelInfoAllocated->heapInfo.pKernelHeader;
|
||||
delete kernelInfoAllocated->patchInfo.executionEnvironment;
|
||||
@@ -148,9 +146,6 @@ class MockKernel : public Kernel {
|
||||
|
||||
uint32_t getPatchedArgumentsNum() const { return patchedArgumentsNum; }
|
||||
|
||||
cl_int initialize() {
|
||||
return Kernel::initialize();
|
||||
}
|
||||
bool isPatched() const override;
|
||||
|
||||
bool canTransformImages() const override;
|
||||
@@ -177,25 +172,16 @@ class MockKernel : public Kernel {
|
||||
}
|
||||
|
||||
void setSshLocal(const void *sshPattern, uint32_t newSshSize) {
|
||||
if ((Kernel::pSshLocal != nullptr) && (Kernel::pSshLocal != mockSshLocal.data())) {
|
||||
delete[] Kernel::pSshLocal;
|
||||
Kernel::pSshLocal = nullptr;
|
||||
Kernel::sshLocalSize = 0;
|
||||
}
|
||||
|
||||
if (sshPattern && (newSshSize > 0)) {
|
||||
mockSshLocal.clear();
|
||||
mockSshLocal.insert(mockSshLocal.begin(), (char *)sshPattern, ((char *)sshPattern) + newSshSize);
|
||||
} else {
|
||||
mockSshLocal.resize(newSshSize, 0);
|
||||
}
|
||||
sshLocalSize = newSshSize;
|
||||
|
||||
if (newSshSize == 0) {
|
||||
return;
|
||||
pSshLocal.reset(nullptr);
|
||||
} else {
|
||||
pSshLocal = std::make_unique<char[]>(newSshSize);
|
||||
if (sshPattern) {
|
||||
memcpy_s(pSshLocal.get(), newSshSize, sshPattern, newSshSize);
|
||||
}
|
||||
}
|
||||
|
||||
Kernel::pSshLocal = mockSshLocal.data();
|
||||
Kernel::sshLocalSize = static_cast<uint32_t>(mockSshLocal.size());
|
||||
}
|
||||
|
||||
void setPrivateSurface(GraphicsAllocation *gfxAllocation, uint32_t size) {
|
||||
|
||||
Reference in New Issue
Block a user