Provide compression performance hints during resource allocation

Change-Id: I1f8076b9c7cc51e16c25a554673a19c4a0c87d63
Resolves: NEO-3243
Signed-off-by: Andrzej Swierczynski <andrzej.swierczynski@intel.com>
This commit is contained in:
Andrzej Swierczynski 2019-06-28 18:22:38 +02:00 committed by sys_ocldev
parent 6abb6523c2
commit 2e2d5d53b4
7 changed files with 275 additions and 4 deletions

View File

@ -56,8 +56,11 @@ const char *const DriverDiagnostics::hintFormat[] = {
"Performance hint: Kernel %s register pressure is too high, spill fills will be generated, additional surface needs to be allocated of size %u, consider simplifying your kernel.", //REGISTER_PRESSURE_TOO_HIGH "Performance hint: Kernel %s register pressure is too high, spill fills will be generated, additional surface needs to be allocated of size %u, consider simplifying your kernel.", //REGISTER_PRESSURE_TOO_HIGH
"Performance hint: Kernel %s private memory usage is too high and exhausts register space, additional surface needs to be allocated of size %u, consider reducing amount of private memory used, avoid using private memory arrays.", //PRIVATE_MEMORY_USAGE_TOO_HIGH "Performance hint: Kernel %s private memory usage is too high and exhausts register space, additional surface needs to be allocated of size %u, consider reducing amount of private memory used, avoid using private memory arrays.", //PRIVATE_MEMORY_USAGE_TOO_HIGH
"Performance hint: Kernel %s submission requires coherency with CPU; this will impact performance.", //KERNEL_REQUIRES_COHERENCY "Performance hint: Kernel %s submission requires coherency with CPU; this will impact performance.", //KERNEL_REQUIRES_COHERENCY
"Performance hint: Kernel %s requires aux translation on argument [%u] = \"%s\"" //KERNEL_ARGUMENT_AUX_TRANSLATION "Performance hint: Kernel %s requires aux translation on argument [%u] = \"%s\"", //KERNEL_ARGUMENT_AUX_TRANSLATION
}; "Performance hint: Buffer %p will use compressed memory.", //BUFFER_IS_COMPRESSED
"Performance hint: Buffer %p will not use compressed memory.", //BUFFER_IS_NOT_COMPRESSED
"Performance hint: Image %p will use compressed memory.", //IMAGE_IS_COMPRESSED
"Performance hint: Image %p will not use compressed memory."}; //IMAGE_IS_NOT_COMPRESSED
PerformanceHints DriverDiagnostics::obtainHintForTransferOperation(cl_command_type commandType, bool transferRequired) { PerformanceHints DriverDiagnostics::obtainHintForTransferOperation(cl_command_type commandType, bool transferRequired) {
PerformanceHints hint; PerformanceHints hint;

View File

@ -47,7 +47,11 @@ enum PerformanceHints {
REGISTER_PRESSURE_TOO_HIGH, REGISTER_PRESSURE_TOO_HIGH,
PRIVATE_MEMORY_USAGE_TOO_HIGH, PRIVATE_MEMORY_USAGE_TOO_HIGH,
KERNEL_REQUIRES_COHERENCY, KERNEL_REQUIRES_COHERENCY,
KERNEL_ARGUMENT_AUX_TRANSLATION KERNEL_ARGUMENT_AUX_TRANSLATION,
BUFFER_IS_COMPRESSED,
BUFFER_IS_NOT_COMPRESSED,
IMAGE_IS_COMPRESSED,
IMAGE_IS_NOT_COMPRESSED
}; };
class DriverDiagnostics { class DriverDiagnostics {

View File

@ -276,6 +276,9 @@ Buffer *Buffer::create(Context *context,
mapAllocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, hostPtr); mapAllocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, hostPtr);
} }
} }
Buffer::provideCompressionHint(allocationType, context, pBuffer);
pBuffer->mapAllocation = mapAllocation; pBuffer->mapAllocation = mapAllocation;
pBuffer->setHostPtrMinSize(size); pBuffer->setHostPtrMinSize(size);
@ -563,4 +566,16 @@ void Buffer::setSurfaceState(const Device *device,
buffer->graphicsAllocation = nullptr; buffer->graphicsAllocation = nullptr;
delete buffer; delete buffer;
} }
void Buffer::provideCompressionHint(GraphicsAllocation::AllocationType allocationType,
Context *context,
Buffer *buffer) {
if (context->isProvidingPerformanceHints() && HwHelper::renderCompressedBuffersSupported(context->getDevice(0)->getHardwareInfo())) {
if (allocationType == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, BUFFER_IS_COMPRESSED, buffer);
} else {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, BUFFER_IS_NOT_COMPRESSED, buffer);
}
}
}
} // namespace NEO } // namespace NEO

View File

@ -100,6 +100,10 @@ class Buffer : public MemObj {
GraphicsAllocation *gfxAlloc = nullptr, GraphicsAllocation *gfxAlloc = nullptr,
cl_mem_flags flags = 0); cl_mem_flags flags = 0);
static void provideCompressionHint(GraphicsAllocation::AllocationType allocationType,
Context *context,
Buffer *buffer);
BufferCreatFunc createFunction = nullptr; BufferCreatFunc createFunction = nullptr;
bool isSubBuffer(); bool isSubBuffer();
bool isValidSubBufferOffset(size_t offset); bool isValidSubBufferOffset(size_t offset);

View File

@ -222,6 +222,7 @@ Image *Image::create(Context *context,
} }
memory = parentBuffer->getGraphicsAllocation(); memory = parentBuffer->getGraphicsAllocation();
// Image from buffer - we never allocate memory, we use what buffer provides // Image from buffer - we never allocate memory, we use what buffer provides
zeroCopy = true; zeroCopy = true;
hostPtr = parentBuffer->getHostPtr(); hostPtr = parentBuffer->getHostPtr();
@ -313,6 +314,16 @@ Image *Image::create(Context *context,
image = createImageHw(context, properties, imgInfo.size, hostPtrToSet, surfaceFormat->OCLImageFormat, image = createImageHw(context, properties, imgInfo.size, hostPtrToSet, surfaceFormat->OCLImageFormat,
imageDescriptor, zeroCopy, memory, false, isTilingAllowed, 0, 0, surfaceFormat); imageDescriptor, zeroCopy, memory, false, isTilingAllowed, 0, 0, surfaceFormat);
if (context->isProvidingPerformanceHints() && HwHelper::renderCompressedImagesSupported(context->getDevice(0)->getHardwareInfo())) {
if (memory->getDefaultGmm()) {
if (memory->getDefaultGmm()->isRenderCompressed) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, IMAGE_IS_COMPRESSED, image);
} else {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, IMAGE_IS_NOT_COMPRESSED, image);
}
}
}
if (imageDesc->image_type != CL_MEM_OBJECT_IMAGE1D_ARRAY && imageDesc->image_type != CL_MEM_OBJECT_IMAGE2D_ARRAY) { if (imageDesc->image_type != CL_MEM_OBJECT_IMAGE1D_ARRAY && imageDesc->image_type != CL_MEM_OBJECT_IMAGE2D_ARRAY) {
image->imageDesc.image_array_size = 0; image->imageDesc.image_array_size = 0;
} }

View File

@ -7,7 +7,9 @@
#include "driver_diagnostics_tests.h" #include "driver_diagnostics_tests.h"
#include "runtime/mem_obj/mem_obj_helper.h"
#include "unit_tests/helpers/debug_manager_state_restore.h" #include "unit_tests/helpers/debug_manager_state_restore.h"
#include "unit_tests/mocks/mock_gmm.h"
#include <tuple> #include <tuple>
@ -455,6 +457,238 @@ TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenCallF
EXPECT_TRUE(containsHint(expectedHint, userData)); EXPECT_TRUE(containsHint(expectedHint, userData));
} }
TEST_F(PerformanceHintTest, given64bitCompressedBufferWhenItsCreatedThenProperPerformanceHintIsProvided) {
cl_int retVal;
HardwareInfo hwInfo = context->getDevice(0)->getHardwareInfo();
hwInfo.capabilityTable.ftrRenderCompressedBuffers = true;
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
cl_device_id deviceId = static_cast<cl_device_id>(device.get());
const MemoryProperties properties(1 << 21);
size_t size = 8192u;
cl_context_properties validProperties[3] = {CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL, 0};
auto context = std::unique_ptr<MockContext>(Context::create<NEO::MockContext>(validProperties, DeviceVector(&deviceId, 1), callbackFunction, static_cast<void *>(userData), retVal));
context->isSharedContext = false;
auto buffer = std::unique_ptr<Buffer>(Buffer::create(context.get(), properties, size, static_cast<void *>(NULL), retVal));
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[BUFFER_IS_COMPRESSED], buffer.get());
if (!is32bit && HwHelper::renderCompressedBuffersSupported(hwInfo)) {
EXPECT_TRUE(containsHint(expectedHint, userData));
} else {
EXPECT_FALSE(containsHint(expectedHint, userData));
}
}
TEST_F(PerformanceHintTest, givenUncompressedBufferWhenItsCreatedThenProperPerformanceHintIsProvided) {
cl_int retVal;
HardwareInfo hwInfo = context->getDevice(0)->getHardwareInfo();
hwInfo.capabilityTable.ftrRenderCompressedBuffers = true;
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
cl_device_id deviceId = static_cast<cl_device_id>(device.get());
const MemoryProperties properties(CL_MEM_READ_WRITE);
size_t size = 0u;
cl_context_properties validProperties[3] = {CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL, 0};
auto context = std::unique_ptr<MockContext>(Context::create<NEO::MockContext>(validProperties, DeviceVector(&deviceId, 1), callbackFunction, static_cast<void *>(userData), retVal));
std::unique_ptr<Buffer> buffer;
bool isCompressed = true;
if (context->getMemoryManager()) {
isCompressed = MemObjHelper::isSuitableForRenderCompression(
HwHelper::renderCompressedBuffersSupported(hwInfo),
properties, context->peekContextType(),
HwHelper::get(hwInfo.platform.eRenderCoreFamily).obtainRenderBufferCompressionPreference(size)) &&
!is32bit && !context->isSharedContext &&
(!isValueSet(properties.flags, CL_MEM_USE_HOST_PTR) || context->getMemoryManager()->isLocalMemorySupported()) &&
!isValueSet(properties.flags, CL_MEM_FORCE_SHARED_PHYSICAL_MEMORY_INTEL);
buffer = std::unique_ptr<Buffer>(Buffer::create(context.get(), properties, size, static_cast<void *>(NULL), retVal));
}
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[BUFFER_IS_NOT_COMPRESSED], buffer.get());
if (isCompressed) {
Buffer::provideCompressionHint(GraphicsAllocation::AllocationType::BUFFER, context.get(), buffer.get());
}
EXPECT_TRUE(containsHint(expectedHint, userData));
}
TEST_F(PerformanceHintTest, givenCompressedImageWhenItsCreatedThenProperPerformanceHintIsProvided) {
HardwareInfo hwInfo = context->getDevice(0)->getHardwareInfo();
hwInfo.capabilityTable.ftrRenderCompressedImages = true;
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
cl_device_id deviceId = static_cast<cl_device_id>(device.get());
cl_context_properties validProperties[3] = {CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL, 0};
auto context = std::unique_ptr<MockContext>(Context::create<NEO::MockContext>(validProperties, DeviceVector(&deviceId, 1), callbackFunction, static_cast<void *>(userData), retVal));
const size_t width = 5;
const size_t height = 3;
const size_t depth = 2;
cl_int retVal = CL_SUCCESS;
auto const elementSize = 4;
char *hostPtr = static_cast<char *>(alignedMalloc(width * height * depth * elementSize * 2, 64));
cl_image_format imageFormat;
cl_image_desc imageDesc;
auto mockBuffer = std::unique_ptr<MockBuffer>(new MockBuffer());
StorageInfo info;
size_t t = 4;
auto gmm = std::unique_ptr<Gmm>(new Gmm(static_cast<const void *>(nullptr), t, false, true, true, info));
gmm->isRenderCompressed = true;
mockBuffer->getGraphicsAllocation()->setDefaultGmm(gmm.get());
cl_mem mem = mockBuffer.get();
imageFormat.image_channel_data_type = CL_UNORM_INT8;
imageFormat.image_channel_order = CL_RGBA;
imageDesc.num_mip_levels = 0;
imageDesc.num_samples = 0;
imageDesc.mem_object = mem;
imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER;
imageDesc.image_width = width;
imageDesc.image_height = 0;
imageDesc.image_depth = 0;
imageDesc.image_array_size = 0;
imageDesc.image_row_pitch = 0;
imageDesc.image_slice_pitch = 0;
cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR;
auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat);
auto image = std::unique_ptr<Image>(Image::create(
context.get(),
flags,
surfaceFormat,
&imageDesc,
hostPtr,
retVal));
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[IMAGE_IS_COMPRESSED], image.get());
alignedFree(hostPtr);
if (HwHelper::renderCompressedImagesSupported(hwInfo)) {
EXPECT_TRUE(containsHint(expectedHint, userData));
} else {
EXPECT_FALSE(containsHint(expectedHint, userData));
}
}
TEST_F(PerformanceHintTest, givenImageWithNoGmmWhenItsCreatedThenNoPerformanceHintIsProvided) {
HardwareInfo hwInfo = context->getDevice(0)->getHardwareInfo();
hwInfo.capabilityTable.ftrRenderCompressedImages = true;
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
cl_device_id deviceId = static_cast<cl_device_id>(device.get());
cl_context_properties validProperties[3] = {CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL, 0};
auto context = std::unique_ptr<MockContext>(Context::create<NEO::MockContext>(validProperties, DeviceVector(&deviceId, 1), callbackFunction, static_cast<void *>(userData), retVal));
const size_t width = 5;
const size_t height = 3;
const size_t depth = 2;
cl_int retVal = CL_SUCCESS;
auto const elementSize = 4;
char *hostPtr = static_cast<char *>(alignedMalloc(width * height * depth * elementSize * 2, 64));
cl_image_format imageFormat;
cl_image_desc imageDesc;
auto mockBuffer = std::unique_ptr<MockBuffer>(new MockBuffer());
cl_mem mem = mockBuffer.get();
imageFormat.image_channel_data_type = CL_UNORM_INT8;
imageFormat.image_channel_order = CL_RGBA;
imageDesc.num_mip_levels = 0;
imageDesc.num_samples = 0;
imageDesc.mem_object = mem;
imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER;
imageDesc.image_width = width;
imageDesc.image_height = 0;
imageDesc.image_depth = 0;
imageDesc.image_array_size = 0;
imageDesc.image_row_pitch = 0;
imageDesc.image_slice_pitch = 0;
cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR;
auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat);
auto image = std::unique_ptr<Image>(Image::create(
context.get(),
flags,
surfaceFormat,
&imageDesc,
hostPtr,
retVal));
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[IMAGE_IS_COMPRESSED], image.get());
EXPECT_FALSE(containsHint(expectedHint, userData));
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[IMAGE_IS_NOT_COMPRESSED], image.get());
EXPECT_FALSE(containsHint(expectedHint, userData));
alignedFree(hostPtr);
}
TEST_F(PerformanceHintTest, givenUncompressedImageWhenItsCreatedThenProperPerformanceHintIsProvided) {
HardwareInfo hwInfo = context->getDevice(0)->getHardwareInfo();
hwInfo.capabilityTable.ftrRenderCompressedImages = true;
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
cl_device_id deviceId = static_cast<cl_device_id>(device.get());
cl_context_properties validProperties[3] = {CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL, 0};
auto context = std::unique_ptr<MockContext>(Context::create<NEO::MockContext>(validProperties, DeviceVector(&deviceId, 1), callbackFunction, static_cast<void *>(userData), retVal));
const size_t width = 5;
const size_t height = 3;
const size_t depth = 2;
cl_int retVal = CL_SUCCESS;
auto const elementSize = 4;
char *hostPtr = static_cast<char *>(alignedMalloc(width * height * depth * elementSize * 2, 64));
cl_image_format imageFormat;
cl_image_desc imageDesc;
auto mockBuffer = std::unique_ptr<MockBuffer>(new MockBuffer());
StorageInfo info;
size_t t = 4;
auto gmm = std::unique_ptr<Gmm>(new Gmm((const void *)nullptr, t, false, true, true, info));
gmm->isRenderCompressed = false;
mockBuffer->getGraphicsAllocation()->setDefaultGmm(gmm.get());
cl_mem mem = mockBuffer.get();
imageFormat.image_channel_data_type = CL_UNORM_INT8;
imageFormat.image_channel_order = CL_RGBA;
imageDesc.num_mip_levels = 0;
imageDesc.num_samples = 0;
imageDesc.mem_object = mem;
imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER;
imageDesc.image_width = width;
imageDesc.image_height = 0;
imageDesc.image_depth = 0;
imageDesc.image_array_size = 0;
imageDesc.image_row_pitch = 0;
imageDesc.image_slice_pitch = 0;
cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR;
auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat);
auto image = std::unique_ptr<Image>(Image::create(
context.get(),
flags,
surfaceFormat,
&imageDesc,
hostPtr,
retVal));
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[IMAGE_IS_NOT_COMPRESSED], image.get());
alignedFree(hostPtr);
if (HwHelper::renderCompressedImagesSupported(hwInfo)) {
EXPECT_TRUE(containsHint(expectedHint, userData));
} else {
EXPECT_FALSE(containsHint(expectedHint, userData));
}
}
TEST_P(PerformanceHintKernelTest, GivenSpillFillWhenKernelIsInitializedThenContextProvidesProperHint) { TEST_P(PerformanceHintKernelTest, GivenSpillFillWhenKernelIsInitializedThenContextProvidesProperHint) {
auto pDevice = castToObject<Device>(devices[0]); auto pDevice = castToObject<Device>(devices[0]);

View File

@ -24,7 +24,7 @@
using namespace NEO; using namespace NEO;
const int maxHintCounter = 4; const int maxHintCounter = 6;
bool containsHint(const char *providedHint, char *userData); bool containsHint(const char *providedHint, char *userData);