Disable NonAux to Aux translation for Parent Kernel

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
Change-Id: I863608fe3652e7e777a1e841d79b5b56e7362a3f
This commit is contained in:
Dunajski, Bartosz
2018-08-20 15:17:48 +02:00
committed by sys_ocldev
parent e84ea0ffa0
commit 931b462ee1
5 changed files with 66 additions and 2 deletions

View File

@@ -106,9 +106,15 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
}
}
if (kernel->isAuxTranslationRequired()) {
if (kernel->isParentKernel) {
for (auto &buffer : buffersForAuxTranslation) {
buffer->getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER);
}
} else {
dispatchAuxTranslation(multiDispatchInfo, buffersForAuxTranslation, AuxTranslationDirection::NonAuxToAux);
}
}
}
enqueueHandler<commandType>(surfaces, blocking, multiDispatchInfo, numEventsInWaitList, eventWaitList, event);
}

View File

@@ -86,7 +86,8 @@ void BufferHw<GfxFamily>::setArgStateful(void *memory, bool forceNonAuxMode) {
Gmm *gmm = graphicsAllocation ? graphicsAllocation->gmm : nullptr;
if (gmm && gmm->isRenderCompressed && !forceNonAuxMode) {
if (gmm && gmm->isRenderCompressed && !forceNonAuxMode &&
GraphicsAllocation::AllocationType::BUFFER_COMPRESSED == graphicsAllocation->getAllocationType()) {
// Its expected to not program pitch/qpitch/baseAddress for Aux surface in CCS scenarios
surfaceState->setCoherencyType(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT);
surfaceState->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E);

View File

@@ -34,6 +34,7 @@
#include "unit_tests/helpers/hw_parse.h"
#include "unit_tests/helpers/debug_manager_state_restore.h"
#include "unit_tests/mocks/mock_csr.h"
#include "unit_tests/mocks/mock_device_queue.h"
#include "unit_tests/mocks/mock_buffer.h"
#include "unit_tests/mocks/mock_submissions_aggregator.h"
#include "runtime/helpers/hw_info.h"
@@ -1689,3 +1690,51 @@ HWTEST_F(EnqueueAuxKernelTests, givenKernelWithRequiredAuxTranslationWhenEnqueue
EXPECT_EQ("fullCopy", kernelAfter->getKernelInfo().name);
EXPECT_TRUE(kernelAfter->isBuiltIn);
}
HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueAuxKernelTests, givenParentKernelWhenAuxTranslationIsRequiredThenDontTranslateFromNonAuxToAux) {
if (pDevice->getSupportedClVersion() >= 20) {
MyCmdQ<FamilyType> cmdQ(context, pDevice);
size_t gws[3] = {1, 0, 0};
MockBuffer buffer0, buffer1, buffer2;
cl_mem clMem0 = &buffer0;
cl_mem clMem1 = &buffer1;
cl_mem clMem2 = &buffer2;
buffer0.getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
buffer1.getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
buffer2.getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
cl_queue_properties queueProperties = {};
auto mockDevQueue = std::make_unique<MockDeviceQueueHw<FamilyType>>(context, pDevice, queueProperties);
context->setDefaultDeviceQueue(mockDevQueue.get());
std::unique_ptr<MockParentKernel> parentKernel(MockParentKernel::create(*context));
parentKernel->auxTranslationRequired = true;
parentKernel->mockKernelInfo->kernelArgInfo.resize(3);
for (auto &kernelInfo : parentKernel->mockKernelInfo->kernelArgInfo) {
kernelInfo.kernelArgPatchInfoVector.resize(1);
}
parentKernel->initialize();
parentKernel->mockKernelInfo->kernelArgInfo.at(0).pureStatefulBufferAccess = false;
parentKernel->mockKernelInfo->kernelArgInfo.at(1).pureStatefulBufferAccess = true;
parentKernel->mockKernelInfo->kernelArgInfo.at(2).pureStatefulBufferAccess = false;
parentKernel->setArgBuffer(0, sizeof(cl_mem *), &clMem0); // stateless on BUFFER_COMPRESSED - insert
parentKernel->setArgBuffer(1, sizeof(cl_mem *), &clMem1); // stateful on BUFFER_COMPRESSED - dont insert
parentKernel->setArgBuffer(2, sizeof(cl_mem *), &clMem2); // stateless on BUFFER_COMPRESSED - insert
cmdQ.enqueueKernel(parentKernel.get(), 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(1u, cmdQ.dispatchAuxTranslationInputs.size());
EXPECT_EQ(2u, std::get<BuffersForAuxTranslation>(cmdQ.dispatchAuxTranslationInputs.at(0)).size()); // before kernel
auto &dispatchedBuffers = std::get<BuffersForAuxTranslation>(cmdQ.dispatchAuxTranslationInputs.at(0));
EXPECT_NE(dispatchedBuffers.end(), dispatchedBuffers.find(&buffer0));
EXPECT_EQ(dispatchedBuffers.end(), dispatchedBuffers.find(&buffer1));
EXPECT_NE(dispatchedBuffers.end(), dispatchedBuffers.find(&buffer2));
EXPECT_EQ(GraphicsAllocation::AllocationType::BUFFER, buffer0.getGraphicsAllocation()->getAllocationType());
EXPECT_EQ(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED, buffer1.getGraphicsAllocation()->getAllocationType());
EXPECT_EQ(GraphicsAllocation::AllocationType::BUFFER, buffer2.getGraphicsAllocation()->getAllocationType());
}
}

View File

@@ -1219,6 +1219,7 @@ HWTEST_F(BufferSetSurfaceTests, givenRenderCompressedGmmResourceWhenSurfaceState
auto retVal = CL_SUCCESS;
std::unique_ptr<Buffer> buffer(Buffer::create(&context, CL_MEM_READ_WRITE, 1, nullptr, retVal));
buffer->getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
auto gmm = new Gmm(nullptr, 1, false);
buffer->getGraphicsAllocation()->gmm = gmm;
gmm->isRenderCompressed = true;
@@ -1228,6 +1229,10 @@ HWTEST_F(BufferSetSurfaceTests, givenRenderCompressedGmmResourceWhenSurfaceState
EXPECT_EQ(0u, surfaceState.getAuxiliarySurfaceBaseAddress());
EXPECT_TRUE(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E == surfaceState.getAuxiliarySurfaceMode());
EXPECT_TRUE(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT == surfaceState.getCoherencyType());
buffer->getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER);
buffer->setArgStateful(&surfaceState, false);
EXPECT_TRUE(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE == surfaceState.getAuxiliarySurfaceMode());
}
HWTEST_F(BufferSetSurfaceTests, givenNonRenderCompressedGmmResourceWhenSurfaceStateIsProgrammedThenDontSetAuxParams) {

View File

@@ -308,6 +308,7 @@ class MockKernelWithInternals {
class MockParentKernel : public Kernel {
public:
using Kernel::auxTranslationRequired;
using Kernel::patchBlocksCurbeWithConstantValues;
static MockParentKernel *create(Context &context, bool addChildSimdSize = false, bool addChildGlobalMemory = false, bool addChildConstantMemory = false, bool addPrintfForParent = true, bool addPrintfForBlock = true) {
Device &device = *context.getDevice(0);
@@ -387,6 +388,7 @@ class MockParentKernel : public Kernel {
parent->crossThreadData = new char[crossThreadSize];
memset(parent->crossThreadData, 0, crossThreadSize);
parent->crossThreadDataSize = crossThreadSize;
parent->mockKernelInfo = info;
KernelInfo *infoBlock = new KernelInfo();
SPatchAllocateStatelessDefaultDeviceQueueSurface *allocateDeviceQueueBlock = new SPatchAllocateStatelessDefaultDeviceQueueSurface;
@@ -537,6 +539,7 @@ class MockParentKernel : public Kernel {
}
MockProgram *mockProgram;
KernelInfo *mockKernelInfo = nullptr;
};
class MockSchedulerKernel : public SchedulerKernel {