diff --git a/opencl/source/context/driver_diagnostics.cpp b/opencl/source/context/driver_diagnostics.cpp index 7b0b731065..3314a1b795 100644 --- a/opencl/source/context/driver_diagnostics.cpp +++ b/opencl/source/context/driver_diagnostics.cpp @@ -57,6 +57,7 @@ const char *const DriverDiagnostics::hintFormat[] = { "Performance hint: Kernel %s private memory usage is too high and exhausts register space, additional surface needs to be allocated of size %u, consider reducing amount of private memory used, avoid using private memory arrays.", //PRIVATE_MEMORY_USAGE_TOO_HIGH "Performance hint: Kernel %s submission requires coherency with CPU; this will impact performance.", //KERNEL_REQUIRES_COHERENCY "Performance hint: Kernel %s requires aux translation on argument [%u] = \"%s\"", //KERNEL_ARGUMENT_AUX_TRANSLATION + "Performance hint: Kernel %s requires aux translation for allocation with pointer %p and size %u", //KERNEL_ALLOCATION_AUX_TRANSLATION "Performance hint: Buffer %p will use compressed memory.", //BUFFER_IS_COMPRESSED "Performance hint: Buffer %p will not use compressed memory.", //BUFFER_IS_NOT_COMPRESSED "Performance hint: Image %p will use compressed memory.", //IMAGE_IS_COMPRESSED diff --git a/opencl/source/context/driver_diagnostics.h b/opencl/source/context/driver_diagnostics.h index 668a1be2db..550a44ccbc 100644 --- a/opencl/source/context/driver_diagnostics.h +++ b/opencl/source/context/driver_diagnostics.h @@ -48,6 +48,7 @@ enum PerformanceHints { PRIVATE_MEMORY_USAGE_TOO_HIGH, KERNEL_REQUIRES_COHERENCY, KERNEL_ARGUMENT_AUX_TRANSLATION, + KERNEL_ALLOCATION_AUX_TRANSLATION, BUFFER_IS_COMPRESSED, BUFFER_IS_NOT_COMPRESSED, IMAGE_IS_COMPRESSED, diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index 0a2cdcc6fc..2b3939cefc 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -2468,6 +2468,19 @@ void Kernel::fillWithKernelObjsForAuxTranslation(KernelObjsForAuxTranslation &ke } } } + if (DebugManager.flags.EnableStatelessCompression.get()) { + for (auto gfxAllocation : kernelUnifiedMemoryGfxAllocations) { + if (gfxAllocation->getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) { + kernelObjsForAuxTranslation.insert({KernelObjForAuxTranslation::Type::GFX_ALLOC, gfxAllocation}); + auto &context = this->program->getContext(); + if (context.isProvidingPerformanceHints()) { + context.providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, KERNEL_ALLOCATION_AUX_TRANSLATION, + kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str(), + reinterpret_cast(gfxAllocation->getGpuAddress()), gfxAllocation->getUnderlyingBufferSize()); + } + } + } + } } bool Kernel::hasDirectStatelessAccessToHostMemory() const { diff --git a/opencl/test/unit_test/context/driver_diagnostics_tests.cpp b/opencl/test/unit_test/context/driver_diagnostics_tests.cpp index 3d5589a594..55d42e5e42 100644 --- a/opencl/test/unit_test/context/driver_diagnostics_tests.cpp +++ b/opencl/test/unit_test/context/driver_diagnostics_tests.cpp @@ -490,6 +490,34 @@ TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenCallF EXPECT_TRUE(containsHint(expectedHint, userData)); } +TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenCallFillWithKernelObjsForAuxTranslationOnUnifiedMemoryThenContextProvidesProperHint) { + DebugManagerStateRestore dbgRestore; + DebugManager.flags.PrintDriverDiagnostics.set(1); + DebugManager.flags.EnableStatelessCompression.set(1); + + auto pDevice = castToObject(devices[0]); + MockKernelWithInternals mockKernel(*pDevice, context); + char data[128]; + void *ptr = &data; + MockGraphicsAllocation gfxAllocation(ptr, 128); + + gfxAllocation.setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED); + + mockKernel.mockKernel->initialize(); + mockKernel.mockKernel->setUnifiedMemoryExecInfo(&gfxAllocation); + + testing::internal::CaptureStdout(); + KernelObjsForAuxTranslation kernelObjects; + mockKernel.mockKernel->fillWithKernelObjsForAuxTranslation(kernelObjects); + + snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[KERNEL_ALLOCATION_AUX_TRANSLATION], + mockKernel.mockKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), ptr, 128); + + std::string output = testing::internal::GetCapturedStdout(); + EXPECT_NE(0u, output.size()); + EXPECT_TRUE(containsHint(expectedHint, userData)); +} + TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenKernelObjectWithGraphicsAllocationAccessedStatefullyOnlyThenDontReportAnyHint) { DebugManagerStateRestore dbgRestore; DebugManager.flags.PrintDriverDiagnostics.set(1); @@ -563,6 +591,29 @@ TEST_F(PerformanceHintTest, whenCallingFillWithKernelObjsForAuxTranslationOnNull EXPECT_EQ(0u, output.size()); } +TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeDisabledWhenCallFillWithKernelObjsForAuxTranslationOnUnifiedMemoryThenDontReportAnyHint) { + DebugManagerStateRestore dbgRestore; + DebugManager.flags.EnableStatelessCompression.set(1); + + auto pDevice = castToObject(devices[0]); + MockKernelWithInternals mockKernel(*pDevice, context); + char data[128]; + void *ptr = &data; + MockGraphicsAllocation gfxAllocation(ptr, 128); + + gfxAllocation.setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED); + + mockKernel.mockKernel->initialize(); + mockKernel.mockKernel->setUnifiedMemoryExecInfo(&gfxAllocation); + + testing::internal::CaptureStdout(); + KernelObjsForAuxTranslation kernelObjects; + mockKernel.mockKernel->fillWithKernelObjsForAuxTranslation(kernelObjects); + + std::string output = testing::internal::GetCapturedStdout(); + EXPECT_EQ(0u, output.size()); +} + TEST_F(PerformanceHintTest, given64bitCompressedBufferWhenItsCreatedThenProperPerformanceHintIsProvided) { cl_int retVal; HardwareInfo hwInfo = context->getDevice(0)->getHardwareInfo(); diff --git a/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp b/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp index 1d21cdafaf..7eaffddfae 100644 --- a/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp @@ -605,6 +605,41 @@ TEST_F(KernelArgBufferTest, givenSetUnifiedMemoryExecInfoOnKernelWithIndirectSta } } +TEST_F(KernelArgBufferTest, givenSetUnifiedMemoryExecInfoOnKernelWithIndirectStatelessAccessWhenFillWithKernelObjsForAuxTranslationIsCalledThenSetKernelObjectsForAuxTranslation) { + DebugManagerStateRestore debugRestorer; + DebugManager.flags.EnableStatelessCompression.set(1); + + pKernelInfo->hasIndirectStatelessAccess = true; + + const auto allocationTypes = {GraphicsAllocation::AllocationType::BUFFER, + GraphicsAllocation::AllocationType::BUFFER_COMPRESSED, + GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY}; + + MockGraphicsAllocation gfxAllocation; + + for (const auto type : allocationTypes) { + gfxAllocation.setAllocationType(type); + + pKernel->setUnifiedMemoryExecInfo(&gfxAllocation); + + KernelObjsForAuxTranslation kernelObjsForAuxTranslation; + pKernel->fillWithKernelObjsForAuxTranslation(kernelObjsForAuxTranslation); + + if (type == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) { + EXPECT_EQ(1u, kernelObjsForAuxTranslation.size()); + auto kernelObj = *kernelObjsForAuxTranslation.find({KernelObjForAuxTranslation::Type::GFX_ALLOC, &gfxAllocation}); + EXPECT_NE(nullptr, kernelObj.object); + EXPECT_EQ(KernelObjForAuxTranslation::Type::GFX_ALLOC, kernelObj.type); + kernelObjsForAuxTranslation.erase(kernelObj); + } else { + EXPECT_EQ(0u, kernelObjsForAuxTranslation.size()); + } + + pKernel->clearUnifiedMemoryExecInfo(); + pKernel->setAuxTranslationRequired(false); + } +} + class KernelArgBufferFixtureBindless : public KernelArgBufferFixture { public: void SetUp() {