diff --git a/runtime/context/driver_diagnostics.cpp b/runtime/context/driver_diagnostics.cpp index 0398709279..c37019600e 100644 --- a/runtime/context/driver_diagnostics.cpp +++ b/runtime/context/driver_diagnostics.cpp @@ -53,6 +53,7 @@ const char *DriverDiagnostics::hintFormat[] = { "Performance hint: Local workgroup sizes { %u, %u, %u } selected for this workload ( kernel name: %s ) may not be optimal, consider using following local workgroup size: { %u, %u, %u }.", //BAD_LOCAL_WORKGROUP_SIZE "Performance hint: Kernel %s register pressure is too high, spill fills will be generated, additional surface needs to be allocated of size %u, consider simplifying your kernel.", //REGISTER_PRESSURE_TOO_HIGH "Performance hint: Kernel %s private memory usage is too high and exhausts register space, additional surface needs to be allocated of size %u, consider reducing amount of private memory used, avoid using private memory arrays.", //PRIVATE_MEMORY_USAGE_TOO_HIGH - "Performance hint: Kernel %s submission requires coherency with CPU; this will impact performance." //KERNEL_REQUIRES_COHERENCY + "Performance hint: Kernel %s submission requires coherency with CPU; this will impact performance.", //KERNEL_REQUIRES_COHERENCY + "Performance hint: Kernel %s requires aux translation on argument [%u] = \"%s\"" //KERNEL_ARGUMENT_AUX_TRANSLATION }; } // namespace NEO diff --git a/runtime/context/driver_diagnostics.h b/runtime/context/driver_diagnostics.h index ddbf38ff8f..70f905ae49 100644 --- a/runtime/context/driver_diagnostics.h +++ b/runtime/context/driver_diagnostics.h @@ -46,7 +46,8 @@ enum PerformanceHints { BAD_LOCAL_WORKGROUP_SIZE, REGISTER_PRESSURE_TOO_HIGH, PRIVATE_MEMORY_USAGE_TOO_HIGH, - KERNEL_REQUIRES_COHERENCY + KERNEL_REQUIRES_COHERENCY, + KERNEL_ARGUMENT_AUX_TRANSLATION }; class DriverDiagnostics { diff --git a/runtime/kernel/kernel.cpp b/runtime/kernel/kernel.cpp index 45e7fe2296..dfe231bff9 100644 --- a/runtime/kernel/kernel.cpp +++ b/runtime/kernel/kernel.cpp @@ -2149,6 +2149,12 @@ void Kernel::fillWithBuffersForAuxTranslation(MemObjsForAuxTranslation &memObjsF auto buffer = castToObject(getKernelArg(i)); if (buffer && buffer->getGraphicsAllocation()->getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) { memObjsForAuxTranslation.insert(buffer); + + auto &context = this->program->getContext(); + if (context.isProvidingPerformanceHints()) { + context.providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, KERNEL_ARGUMENT_AUX_TRANSLATION, + kernelInfo.name.c_str(), i, kernelInfo.kernelArgInfo.at(i).name.c_str()); + } } } } diff --git a/unit_tests/context/driver_diagnostics_tests.cpp b/unit_tests/context/driver_diagnostics_tests.cpp index 699c31b53b..29df60a40d 100644 --- a/unit_tests/context/driver_diagnostics_tests.cpp +++ b/unit_tests/context/driver_diagnostics_tests.cpp @@ -424,6 +424,35 @@ TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenConte context->release(); } +TEST_F(PerformanceHintTest, givenPrintDriverDiagnosticsDebugModeEnabledWhenCallFillWithBuffersForAuxTranslationThenContextProvidesProperHint) { + DebugManagerStateRestore dbgRestore; + DebugManager.flags.PrintDriverDiagnostics.set(1); + + auto pDevice = castToObject(devices[0]); + MockKernelWithInternals mockKernel(*pDevice, context); + MockBuffer buffer; + cl_mem clMem = &buffer; + + buffer.getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED); + mockKernel.kernelInfo.kernelArgInfo.resize(1); + mockKernel.kernelInfo.kernelArgInfo.at(0).kernelArgPatchInfoVector.resize(1); + mockKernel.kernelInfo.kernelArgInfo.at(0).pureStatefulBufferAccess = false; + mockKernel.mockKernel->initialize(); + mockKernel.mockKernel->auxTranslationRequired = true; + mockKernel.mockKernel->setArgBuffer(0, sizeof(cl_mem *), &clMem); + + testing::internal::CaptureStdout(); + MemObjsForAuxTranslation memObjects; + mockKernel.mockKernel->fillWithBuffersForAuxTranslation(memObjects); + + snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[KERNEL_ARGUMENT_AUX_TRANSLATION], + mockKernel.mockKernel->getKernelInfo().name.c_str(), 0, mockKernel.mockKernel->getKernelInfo().kernelArgInfo.at(0).name.c_str()); + + std::string output = testing::internal::GetCapturedStdout(); + EXPECT_NE(0u, output.size()); + EXPECT_TRUE(containsHint(expectedHint, userData)); +} + TEST_P(PerformanceHintKernelTest, GivenSpillFillWhenKernelIsInitializedThenContextProvidesProperHint) { auto pDevice = castToObject(devices[0]);