From 2e2d5d53b4e80d605f6d9badb845e17a1010f0a6 Mon Sep 17 00:00:00 2001 From: Andrzej Swierczynski Date: Fri, 28 Jun 2019 18:22:38 +0200 Subject: [PATCH] Provide compression performance hints during resource allocation Change-Id: I1f8076b9c7cc51e16c25a554673a19c4a0c87d63 Resolves: NEO-3243 Signed-off-by: Andrzej Swierczynski --- runtime/context/driver_diagnostics.cpp | 7 +- runtime/context/driver_diagnostics.h | 6 +- runtime/mem_obj/buffer.cpp | 15 ++ runtime/mem_obj/buffer.h | 4 + runtime/mem_obj/image.cpp | 11 + .../context/driver_diagnostics_tests.cpp | 234 ++++++++++++++++++ unit_tests/context/driver_diagnostics_tests.h | 2 +- 7 files changed, 275 insertions(+), 4 deletions(-) diff --git a/runtime/context/driver_diagnostics.cpp b/runtime/context/driver_diagnostics.cpp index 857efc1500..d8a4afdaf8 100644 --- a/runtime/context/driver_diagnostics.cpp +++ b/runtime/context/driver_diagnostics.cpp @@ -56,8 +56,11 @@ const char *const DriverDiagnostics::hintFormat[] = { "Performance hint: Kernel %s register pressure is too high, spill fills will be generated, additional surface needs to be allocated of size %u, consider simplifying your kernel.", //REGISTER_PRESSURE_TOO_HIGH "Performance hint: Kernel %s private memory usage is too high and exhausts register space, additional surface needs to be allocated of size %u, consider reducing amount of private memory used, avoid using private memory arrays.", //PRIVATE_MEMORY_USAGE_TOO_HIGH "Performance hint: Kernel %s submission requires coherency with CPU; this will impact performance.", //KERNEL_REQUIRES_COHERENCY - "Performance hint: Kernel %s requires aux translation on argument [%u] = \"%s\"" //KERNEL_ARGUMENT_AUX_TRANSLATION -}; + "Performance hint: Kernel %s requires aux translation on argument [%u] = \"%s\"", //KERNEL_ARGUMENT_AUX_TRANSLATION + "Performance hint: Buffer %p will use compressed memory.", //BUFFER_IS_COMPRESSED + "Performance hint: Buffer %p will not use compressed memory.", //BUFFER_IS_NOT_COMPRESSED + "Performance hint: Image %p will use compressed memory.", //IMAGE_IS_COMPRESSED + "Performance hint: Image %p will not use compressed memory."}; //IMAGE_IS_NOT_COMPRESSED PerformanceHints DriverDiagnostics::obtainHintForTransferOperation(cl_command_type commandType, bool transferRequired) { PerformanceHints hint; diff --git a/runtime/context/driver_diagnostics.h b/runtime/context/driver_diagnostics.h index e18a0be46a..1bbd32b1cc 100644 --- a/runtime/context/driver_diagnostics.h +++ b/runtime/context/driver_diagnostics.h @@ -47,7 +47,11 @@ enum PerformanceHints { REGISTER_PRESSURE_TOO_HIGH, PRIVATE_MEMORY_USAGE_TOO_HIGH, KERNEL_REQUIRES_COHERENCY, - KERNEL_ARGUMENT_AUX_TRANSLATION + KERNEL_ARGUMENT_AUX_TRANSLATION, + BUFFER_IS_COMPRESSED, + BUFFER_IS_NOT_COMPRESSED, + IMAGE_IS_COMPRESSED, + IMAGE_IS_NOT_COMPRESSED }; class DriverDiagnostics { diff --git a/runtime/mem_obj/buffer.cpp b/runtime/mem_obj/buffer.cpp index 30f5d43771..2f86228184 100644 --- a/runtime/mem_obj/buffer.cpp +++ b/runtime/mem_obj/buffer.cpp @@ -276,6 +276,9 @@ Buffer *Buffer::create(Context *context, mapAllocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, hostPtr); } } + + Buffer::provideCompressionHint(allocationType, context, pBuffer); + pBuffer->mapAllocation = mapAllocation; pBuffer->setHostPtrMinSize(size); @@ -563,4 +566,16 @@ void Buffer::setSurfaceState(const Device *device, buffer->graphicsAllocation = nullptr; delete buffer; } + +void Buffer::provideCompressionHint(GraphicsAllocation::AllocationType allocationType, + Context *context, + Buffer *buffer) { + if (context->isProvidingPerformanceHints() && HwHelper::renderCompressedBuffersSupported(context->getDevice(0)->getHardwareInfo())) { + if (allocationType == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) { + context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, BUFFER_IS_COMPRESSED, buffer); + } else { + context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, BUFFER_IS_NOT_COMPRESSED, buffer); + } + } +} } // namespace NEO diff --git a/runtime/mem_obj/buffer.h b/runtime/mem_obj/buffer.h index c2fdb701a5..b89757e14b 100644 --- a/runtime/mem_obj/buffer.h +++ b/runtime/mem_obj/buffer.h @@ -100,6 +100,10 @@ class Buffer : public MemObj { GraphicsAllocation *gfxAlloc = nullptr, cl_mem_flags flags = 0); + static void provideCompressionHint(GraphicsAllocation::AllocationType allocationType, + Context *context, + Buffer *buffer); + BufferCreatFunc createFunction = nullptr; bool isSubBuffer(); bool isValidSubBufferOffset(size_t offset); diff --git a/runtime/mem_obj/image.cpp b/runtime/mem_obj/image.cpp index 947ab73d1a..242a68ef3d 100644 --- a/runtime/mem_obj/image.cpp +++ b/runtime/mem_obj/image.cpp @@ -222,6 +222,7 @@ Image *Image::create(Context *context, } memory = parentBuffer->getGraphicsAllocation(); + // Image from buffer - we never allocate memory, we use what buffer provides zeroCopy = true; hostPtr = parentBuffer->getHostPtr(); @@ -313,6 +314,16 @@ Image *Image::create(Context *context, image = createImageHw(context, properties, imgInfo.size, hostPtrToSet, surfaceFormat->OCLImageFormat, imageDescriptor, zeroCopy, memory, false, isTilingAllowed, 0, 0, surfaceFormat); + if (context->isProvidingPerformanceHints() && HwHelper::renderCompressedImagesSupported(context->getDevice(0)->getHardwareInfo())) { + if (memory->getDefaultGmm()) { + if (memory->getDefaultGmm()->isRenderCompressed) { + context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, IMAGE_IS_COMPRESSED, image); + } else { + context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, IMAGE_IS_NOT_COMPRESSED, image); + } + } + } + if (imageDesc->image_type != CL_MEM_OBJECT_IMAGE1D_ARRAY && imageDesc->image_type != CL_MEM_OBJECT_IMAGE2D_ARRAY) { image->imageDesc.image_array_size = 0; } diff --git a/unit_tests/context/driver_diagnostics_tests.cpp b/unit_tests/context/driver_diagnostics_tests.cpp index 51c11b5add..f26021c47a 100644 --- a/unit_tests/context/driver_diagnostics_tests.cpp +++ b/unit_tests/context/driver_diagnostics_tests.cpp @@ -7,7 +7,9 @@ #include "driver_diagnostics_tests.h" +#include "runtime/mem_obj/mem_obj_helper.h" #include "unit_tests/helpers/debug_manager_state_restore.h" +#include "unit_tests/mocks/mock_gmm.h" #include @@ -455,6 +457,238 @@ TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenCallF EXPECT_TRUE(containsHint(expectedHint, userData)); } +TEST_F(PerformanceHintTest, given64bitCompressedBufferWhenItsCreatedThenProperPerformanceHintIsProvided) { + cl_int retVal; + HardwareInfo hwInfo = context->getDevice(0)->getHardwareInfo(); + hwInfo.capabilityTable.ftrRenderCompressedBuffers = true; + + auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); + cl_device_id deviceId = static_cast(device.get()); + const MemoryProperties properties(1 << 21); + size_t size = 8192u; + + cl_context_properties validProperties[3] = {CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL, 0}; + auto context = std::unique_ptr(Context::create(validProperties, DeviceVector(&deviceId, 1), callbackFunction, static_cast(userData), retVal)); + context->isSharedContext = false; + auto buffer = std::unique_ptr(Buffer::create(context.get(), properties, size, static_cast(NULL), retVal)); + snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[BUFFER_IS_COMPRESSED], buffer.get()); + if (!is32bit && HwHelper::renderCompressedBuffersSupported(hwInfo)) { + EXPECT_TRUE(containsHint(expectedHint, userData)); + } else { + EXPECT_FALSE(containsHint(expectedHint, userData)); + } +} + +TEST_F(PerformanceHintTest, givenUncompressedBufferWhenItsCreatedThenProperPerformanceHintIsProvided) { + cl_int retVal; + HardwareInfo hwInfo = context->getDevice(0)->getHardwareInfo(); + hwInfo.capabilityTable.ftrRenderCompressedBuffers = true; + + auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); + cl_device_id deviceId = static_cast(device.get()); + const MemoryProperties properties(CL_MEM_READ_WRITE); + size_t size = 0u; + + cl_context_properties validProperties[3] = {CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL, 0}; + auto context = std::unique_ptr(Context::create(validProperties, DeviceVector(&deviceId, 1), callbackFunction, static_cast(userData), retVal)); + std::unique_ptr buffer; + bool isCompressed = true; + if (context->getMemoryManager()) { + isCompressed = MemObjHelper::isSuitableForRenderCompression( + HwHelper::renderCompressedBuffersSupported(hwInfo), + properties, context->peekContextType(), + HwHelper::get(hwInfo.platform.eRenderCoreFamily).obtainRenderBufferCompressionPreference(size)) && + !is32bit && !context->isSharedContext && + (!isValueSet(properties.flags, CL_MEM_USE_HOST_PTR) || context->getMemoryManager()->isLocalMemorySupported()) && + !isValueSet(properties.flags, CL_MEM_FORCE_SHARED_PHYSICAL_MEMORY_INTEL); + + buffer = std::unique_ptr(Buffer::create(context.get(), properties, size, static_cast(NULL), retVal)); + } + snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[BUFFER_IS_NOT_COMPRESSED], buffer.get()); + + if (isCompressed) { + Buffer::provideCompressionHint(GraphicsAllocation::AllocationType::BUFFER, context.get(), buffer.get()); + } + EXPECT_TRUE(containsHint(expectedHint, userData)); +} + +TEST_F(PerformanceHintTest, givenCompressedImageWhenItsCreatedThenProperPerformanceHintIsProvided) { + HardwareInfo hwInfo = context->getDevice(0)->getHardwareInfo(); + hwInfo.capabilityTable.ftrRenderCompressedImages = true; + + auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); + cl_device_id deviceId = static_cast(device.get()); + + cl_context_properties validProperties[3] = {CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL, 0}; + auto context = std::unique_ptr(Context::create(validProperties, DeviceVector(&deviceId, 1), callbackFunction, static_cast(userData), retVal)); + + const size_t width = 5; + const size_t height = 3; + const size_t depth = 2; + cl_int retVal = CL_SUCCESS; + auto const elementSize = 4; + char *hostPtr = static_cast(alignedMalloc(width * height * depth * elementSize * 2, 64)); + + cl_image_format imageFormat; + cl_image_desc imageDesc; + auto mockBuffer = std::unique_ptr(new MockBuffer()); + StorageInfo info; + size_t t = 4; + auto gmm = std::unique_ptr(new Gmm(static_cast(nullptr), t, false, true, true, info)); + gmm->isRenderCompressed = true; + + mockBuffer->getGraphicsAllocation()->setDefaultGmm(gmm.get()); + cl_mem mem = mockBuffer.get(); + imageFormat.image_channel_data_type = CL_UNORM_INT8; + imageFormat.image_channel_order = CL_RGBA; + imageDesc.num_mip_levels = 0; + imageDesc.num_samples = 0; + imageDesc.mem_object = mem; + imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; + imageDesc.image_width = width; + imageDesc.image_height = 0; + imageDesc.image_depth = 0; + imageDesc.image_array_size = 0; + imageDesc.image_row_pitch = 0; + imageDesc.image_slice_pitch = 0; + + cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; + auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat); + + auto image = std::unique_ptr(Image::create( + context.get(), + flags, + surfaceFormat, + &imageDesc, + hostPtr, + retVal)); + + snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[IMAGE_IS_COMPRESSED], image.get()); + alignedFree(hostPtr); + + if (HwHelper::renderCompressedImagesSupported(hwInfo)) { + EXPECT_TRUE(containsHint(expectedHint, userData)); + } else { + EXPECT_FALSE(containsHint(expectedHint, userData)); + } +} + +TEST_F(PerformanceHintTest, givenImageWithNoGmmWhenItsCreatedThenNoPerformanceHintIsProvided) { + HardwareInfo hwInfo = context->getDevice(0)->getHardwareInfo(); + hwInfo.capabilityTable.ftrRenderCompressedImages = true; + + auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); + cl_device_id deviceId = static_cast(device.get()); + + cl_context_properties validProperties[3] = {CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL, 0}; + auto context = std::unique_ptr(Context::create(validProperties, DeviceVector(&deviceId, 1), callbackFunction, static_cast(userData), retVal)); + + const size_t width = 5; + const size_t height = 3; + const size_t depth = 2; + cl_int retVal = CL_SUCCESS; + auto const elementSize = 4; + char *hostPtr = static_cast(alignedMalloc(width * height * depth * elementSize * 2, 64)); + + cl_image_format imageFormat; + cl_image_desc imageDesc; + + auto mockBuffer = std::unique_ptr(new MockBuffer()); + cl_mem mem = mockBuffer.get(); + + imageFormat.image_channel_data_type = CL_UNORM_INT8; + imageFormat.image_channel_order = CL_RGBA; + imageDesc.num_mip_levels = 0; + imageDesc.num_samples = 0; + imageDesc.mem_object = mem; + imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; + imageDesc.image_width = width; + imageDesc.image_height = 0; + imageDesc.image_depth = 0; + imageDesc.image_array_size = 0; + imageDesc.image_row_pitch = 0; + imageDesc.image_slice_pitch = 0; + + cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; + auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat); + + auto image = std::unique_ptr(Image::create( + context.get(), + flags, + surfaceFormat, + &imageDesc, + hostPtr, + retVal)); + + snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[IMAGE_IS_COMPRESSED], image.get()); + EXPECT_FALSE(containsHint(expectedHint, userData)); + snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[IMAGE_IS_NOT_COMPRESSED], image.get()); + EXPECT_FALSE(containsHint(expectedHint, userData)); + + alignedFree(hostPtr); +} + +TEST_F(PerformanceHintTest, givenUncompressedImageWhenItsCreatedThenProperPerformanceHintIsProvided) { + HardwareInfo hwInfo = context->getDevice(0)->getHardwareInfo(); + hwInfo.capabilityTable.ftrRenderCompressedImages = true; + + auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); + cl_device_id deviceId = static_cast(device.get()); + + cl_context_properties validProperties[3] = {CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL, CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL, 0}; + auto context = std::unique_ptr(Context::create(validProperties, DeviceVector(&deviceId, 1), callbackFunction, static_cast(userData), retVal)); + + const size_t width = 5; + const size_t height = 3; + const size_t depth = 2; + cl_int retVal = CL_SUCCESS; + auto const elementSize = 4; + char *hostPtr = static_cast(alignedMalloc(width * height * depth * elementSize * 2, 64)); + + cl_image_format imageFormat; + cl_image_desc imageDesc; + auto mockBuffer = std::unique_ptr(new MockBuffer()); + StorageInfo info; + size_t t = 4; + auto gmm = std::unique_ptr(new Gmm((const void *)nullptr, t, false, true, true, info)); + gmm->isRenderCompressed = false; + + mockBuffer->getGraphicsAllocation()->setDefaultGmm(gmm.get()); + cl_mem mem = mockBuffer.get(); + imageFormat.image_channel_data_type = CL_UNORM_INT8; + imageFormat.image_channel_order = CL_RGBA; + imageDesc.num_mip_levels = 0; + imageDesc.num_samples = 0; + imageDesc.mem_object = mem; + imageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; + imageDesc.image_width = width; + imageDesc.image_height = 0; + imageDesc.image_depth = 0; + imageDesc.image_array_size = 0; + imageDesc.image_row_pitch = 0; + imageDesc.image_slice_pitch = 0; + + cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR; + auto surfaceFormat = Image::getSurfaceFormatFromTable(flags, &imageFormat); + + auto image = std::unique_ptr(Image::create( + context.get(), + flags, + surfaceFormat, + &imageDesc, + hostPtr, + retVal)); + + snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[IMAGE_IS_NOT_COMPRESSED], image.get()); + alignedFree(hostPtr); + + if (HwHelper::renderCompressedImagesSupported(hwInfo)) { + EXPECT_TRUE(containsHint(expectedHint, userData)); + } else { + EXPECT_FALSE(containsHint(expectedHint, userData)); + } +} + TEST_P(PerformanceHintKernelTest, GivenSpillFillWhenKernelIsInitializedThenContextProvidesProperHint) { auto pDevice = castToObject(devices[0]); diff --git a/unit_tests/context/driver_diagnostics_tests.h b/unit_tests/context/driver_diagnostics_tests.h index e114c6f351..3cf1672f80 100644 --- a/unit_tests/context/driver_diagnostics_tests.h +++ b/unit_tests/context/driver_diagnostics_tests.h @@ -24,7 +24,7 @@ using namespace NEO; -const int maxHintCounter = 4; +const int maxHintCounter = 6; bool containsHint(const char *providedHint, char *userData);