From f2d0606301d5c8edc888a233cf57248e9edfce60 Mon Sep 17 00:00:00 2001 From: Piotr Maciejewski Date: Wed, 13 Nov 2019 14:48:44 +0100 Subject: [PATCH] Performance Counters: gen12 support Change-Id: I40ccf4958636fc9ffa9778c7abd9e1663c4bb7e4 --- runtime/command_queue/gpgpu_walker_base.inl | 20 +++++++++----- .../command_queue/gpgpu_walker_bdw_plus.inl | 9 +++++-- runtime/gen11/hw_helper_gen11.cpp | 5 ---- runtime/gen12lp/hw_helper_gen12lp.cpp | 5 ++++ runtime/instrumentation/instrumentation.h | 21 +++++++++++++-- runtime/os_interface/metrics_library.cpp | 6 +++++ runtime/os_interface/performance_counters.cpp | 7 +++-- runtime/os_interface/performance_counters.h | 4 +-- .../performance_counters_tests.cpp | 26 +++++++++---------- 9 files changed, 71 insertions(+), 32 deletions(-) diff --git a/runtime/command_queue/gpgpu_walker_base.inl b/runtime/command_queue/gpgpu_walker_base.inl index 2a6967cfe5..085cf520a8 100644 --- a/runtime/command_queue/gpgpu_walker_base.inl +++ b/runtime/command_queue/gpgpu_walker_base.inl @@ -17,11 +17,13 @@ #include "runtime/device/device_info.h" #include "runtime/event/perf_counter.h" #include "runtime/event/user_event.h" +#include "runtime/helpers/engine_node_helper.h" #include "runtime/helpers/hardware_commands_helper.h" #include "runtime/helpers/queue_helpers.h" #include "runtime/helpers/validators.h" #include "runtime/indirect_heap/indirect_heap.h" #include "runtime/mem_obj/mem_obj.h" +#include "runtime/os_interface/os_context.h" #include "runtime/utilities/tag_allocator.h" #include @@ -156,11 +158,14 @@ void GpgpuWalkerHelper::dispatchPerfCountersCommandsStart( TagNode &hwPerfCounter, LinearStream *commandStream) { - auto pPerformanceCounters = commandQueue.getPerfCounters(); - const uint32_t size = pPerformanceCounters->getGpuCommandsSize(true); + const auto pPerformanceCounters = commandQueue.getPerfCounters(); + const auto commandBufferType = isCcs(commandQueue.getDevice().getDefaultEngine().osContext->getEngineType()) + ? MetricsLibraryApi::GpuCommandBufferType::Compute + : MetricsLibraryApi::GpuCommandBufferType::Render; + const uint32_t size = pPerformanceCounters->getGpuCommandsSize(commandBufferType, true); void *pBuffer = commandStream->getSpace(size); - pPerformanceCounters->getGpuCommands(hwPerfCounter, true, size, pBuffer); + pPerformanceCounters->getGpuCommands(commandBufferType, hwPerfCounter, true, size, pBuffer); } template @@ -169,11 +174,14 @@ void GpgpuWalkerHelper::dispatchPerfCountersCommandsEnd( TagNode &hwPerfCounter, LinearStream *commandStream) { - auto pPerformanceCounters = commandQueue.getPerfCounters(); - const uint32_t size = pPerformanceCounters->getGpuCommandsSize(false); + const auto pPerformanceCounters = commandQueue.getPerfCounters(); + const auto commandBufferType = isCcs(commandQueue.getDevice().getDefaultEngine().osContext->getEngineType()) + ? MetricsLibraryApi::GpuCommandBufferType::Compute + : MetricsLibraryApi::GpuCommandBufferType::Render; + const uint32_t size = pPerformanceCounters->getGpuCommandsSize(commandBufferType, false); void *pBuffer = commandStream->getSpace(size); - pPerformanceCounters->getGpuCommands(hwPerfCounter, false, size, pBuffer); + pPerformanceCounters->getGpuCommands(commandBufferType, hwPerfCounter, false, size, pBuffer); } template diff --git a/runtime/command_queue/gpgpu_walker_bdw_plus.inl b/runtime/command_queue/gpgpu_walker_bdw_plus.inl index b9f88fbed9..f44a6cf805 100644 --- a/runtime/command_queue/gpgpu_walker_bdw_plus.inl +++ b/runtime/command_queue/gpgpu_walker_bdw_plus.inl @@ -194,8 +194,13 @@ size_t EnqueueOperation::getSizeRequiredCSKernel(bool reserveProfilin size += 2 * sizeof(PIPE_CONTROL) + 2 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM); } if (reservePerfCounters) { - size += commandQueue.getPerfCounters()->getGpuCommandsSize(true); - size += commandQueue.getPerfCounters()->getGpuCommandsSize(false); + + const auto commandBufferType = isCcs(commandQueue.getDevice().getDefaultEngine().osContext->getEngineType()) + ? MetricsLibraryApi::GpuCommandBufferType::Compute + : MetricsLibraryApi::GpuCommandBufferType::Render; + + size += commandQueue.getPerfCounters()->getGpuCommandsSize(commandBufferType, true); + size += commandQueue.getPerfCounters()->getGpuCommandsSize(commandBufferType, false); } size += GpgpuWalkerHelper::getSizeForWADisableLSQCROPERFforOCL(pKernel); diff --git a/runtime/gen11/hw_helper_gen11.cpp b/runtime/gen11/hw_helper_gen11.cpp index 87c5592226..55d98ba53e 100644 --- a/runtime/gen11/hw_helper_gen11.cpp +++ b/runtime/gen11/hw_helper_gen11.cpp @@ -17,11 +17,6 @@ uint32_t HwHelperHw::getComputeUnitsUsedForScratch(const HardwareInfo *p return pHwInfo->gtSystemInfo.MaxSubSlicesSupported * pHwInfo->gtSystemInfo.MaxEuPerSubSlice * 8; } -template <> -uint32_t HwHelperHw::getMetricsLibraryGenId() const { - return static_cast(MetricsLibraryApi::ClientGen::Gen11); -} - template class AubHelperHw; template class HwHelperHw; template class FlatBatchBufferHelperHw; diff --git a/runtime/gen12lp/hw_helper_gen12lp.cpp b/runtime/gen12lp/hw_helper_gen12lp.cpp index c28d0e8374..001bba99bc 100644 --- a/runtime/gen12lp/hw_helper_gen12lp.cpp +++ b/runtime/gen12lp/hw_helper_gen12lp.cpp @@ -81,6 +81,11 @@ uint32_t HwHelperHw::getPitchAlignmentForImage(const HardwareInfo *hwInf return 4u; } +template <> +uint32_t HwHelperHw::getMetricsLibraryGenId() const { + return static_cast(MetricsLibraryApi::ClientGen::Gen12); +} + template <> const std::vector HwHelperHw::getGpgpuEngineInstances() const { constexpr std::array gpgpuEngineInstances = {{aub_stream::ENGINE_RCS, diff --git a/runtime/instrumentation/instrumentation.h b/runtime/instrumentation/instrumentation.h index d8b112e601..7101389c7b 100644 --- a/runtime/instrumentation/instrumentation.h +++ b/runtime/instrumentation/instrumentation.h @@ -23,7 +23,8 @@ namespace MetricsLibraryApi { enum class ClientApi : uint32_t { OpenCL }; enum class ClientGen : uint32_t { Unknown, Gen9, - Gen11 }; + Gen11, + Gen12 }; enum class ValueType : uint32_t { Uint32 }; enum class GpuConfigurationActivationType : uint32_t { Tbs, EscapeCode }; @@ -35,7 +36,12 @@ enum class ParameterType : uint32_t { QueryHwCountersReportApiSize, enum class StatusCode : uint32_t { Failed, IncorrectObject, Success }; -enum class GpuCommandBufferType : uint32_t { Render }; +enum class GpuCommandBufferType : uint32_t { Render, + Compute }; + +enum class ClientOptionsType : uint32_t { + Compute +}; // Dummy handles. struct Handle { @@ -60,11 +66,22 @@ struct ClientDataLinux_1_0 { void *Reserved; }; +struct ClientOptionsComputeData_1_0 { + bool Asynchronous; +}; + +struct ClientOptionsData_1_0 { + ClientOptionsType Type; + ClientOptionsComputeData_1_0 Compute; +}; + struct ClientData_1_0 { union { ClientDataWindows_1_0 Windows; ClientDataLinux_1_0 Linux; }; + ClientOptionsData_1_0 *ClientOptions; + uint32_t ClientOptionsCount; }; struct ConfigurationActivateData_1_0 { diff --git a/runtime/os_interface/metrics_library.cpp b/runtime/os_interface/metrics_library.cpp index 1105505b51..6d6cb6a689 100644 --- a/runtime/os_interface/metrics_library.cpp +++ b/runtime/os_interface/metrics_library.cpp @@ -54,6 +54,12 @@ bool MetricsLibrary::contextCreate( ContextCreateData_1_0 &createData, ContextHandle_1_0 &handle) { + MetricsLibraryApi::ClientOptionsData_1_0 clientOptions[1] = {}; + clientOptions[0].Type = MetricsLibraryApi::ClientOptionsType::Compute; + clientOptions[0].Compute.Asynchronous = true; + clientData.ClientOptionsCount = 1; + clientData.ClientOptions = clientOptions; + createData.Api = &api->functions; createData.ClientCallbacks = &api->callbacks; createData.ClientData = &clientData; diff --git a/runtime/os_interface/performance_counters.cpp b/runtime/os_interface/performance_counters.cpp index 2ad972432c..ff7b0ddde4 100644 --- a/runtime/os_interface/performance_counters.cpp +++ b/runtime/os_interface/performance_counters.cpp @@ -152,6 +152,7 @@ QueryHandle_1_0 PerformanceCounters::getQueryHandle() { // PerformanceCounters::getGpuCommandsSize ////////////////////////////////////////////////////// uint32_t PerformanceCounters::getGpuCommandsSize( + const MetricsLibraryApi::GpuCommandBufferType commandBufferType, const bool begin) { CommandBufferData_1_0 bufferData = {}; CommandBufferSize_1_0 bufferSize = {}; @@ -165,8 +166,8 @@ uint32_t PerformanceCounters::getGpuCommandsSize( } bufferData.HandleContext = context; - bufferData.Type = GpuCommandBufferType::Render; bufferData.CommandsType = ObjectType::QueryHwCounters; + bufferData.Type = commandBufferType; bufferData.QueryHwCounters.Begin = begin; bufferData.QueryHwCounters.Handle = getQueryHandle(); @@ -181,17 +182,19 @@ uint32_t PerformanceCounters::getGpuCommandsSize( // PerformanceCounters::getGpuCommands ////////////////////////////////////////////////////// bool PerformanceCounters::getGpuCommands( + const MetricsLibraryApi::GpuCommandBufferType commandBufferType, TagNode &performanceCounters, const bool begin, const uint32_t bufferSize, void *pBuffer) { + // Command Buffer data. CommandBufferData_1_0 bufferData = {}; bufferData.HandleContext = context; - bufferData.Type = GpuCommandBufferType::Render; bufferData.CommandsType = ObjectType::QueryHwCounters; bufferData.Data = pBuffer; bufferData.Size = bufferSize; + bufferData.Type = commandBufferType; // Gpu memory allocation for query hw counters. bufferData.Allocation.CpuAddress = reinterpret_cast(performanceCounters.tagForCpuAccess); diff --git a/runtime/os_interface/performance_counters.h b/runtime/os_interface/performance_counters.h index 541ad5ae58..7b7b2220e8 100644 --- a/runtime/os_interface/performance_counters.h +++ b/runtime/os_interface/performance_counters.h @@ -48,8 +48,8 @@ class PerformanceCounters { ////////////////////////////////////////////////////// // Gpu commands. ////////////////////////////////////////////////////// - uint32_t getGpuCommandsSize(const bool begin); - bool getGpuCommands(TagNode &performanceCounters, const bool begin, const uint32_t bufferSize, void *pBuffer); + uint32_t getGpuCommandsSize(const MetricsLibraryApi::GpuCommandBufferType commandBufferType, const bool begin); + bool getGpuCommands(const MetricsLibraryApi::GpuCommandBufferType commandBufferType, TagNode &performanceCounters, const bool begin, const uint32_t bufferSize, void *pBuffer); ///////////////////////////////////////////////////// // Gpu/Api reports. diff --git a/unit_tests/os_interface/performance_counters_tests.cpp b/unit_tests/os_interface/performance_counters_tests.cpp index 57cf4e5c96..b45e440009 100644 --- a/unit_tests/os_interface/performance_counters_tests.cpp +++ b/unit_tests/os_interface/performance_counters_tests.cpp @@ -186,7 +186,7 @@ TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenMetric EXPECT_TRUE(performanceCountersBase->isAvailable()); // Obtain required command buffer size. - uint32_t commandsSize = performanceCountersBase->getGpuCommandsSize(true); + uint32_t commandsSize = performanceCountersBase->getGpuCommandsSize(MetricsLibraryApi::GpuCommandBufferType::Render, true); EXPECT_NE(0u, commandsSize); // Fill command buffer. @@ -194,7 +194,7 @@ TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenMetric HwPerfCounter perfCounter = {}; TagNode query = {}; query.tagForCpuAccess = &perfCounter; - EXPECT_TRUE(performanceCountersBase->getGpuCommands(query, true, sizeof(buffer), buffer)); + EXPECT_TRUE(performanceCountersBase->getGpuCommands(MetricsLibraryApi::GpuCommandBufferType::Render, query, true, sizeof(buffer), buffer)); // Close library. performanceCountersBase->shutdown(); @@ -208,7 +208,7 @@ TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenMetric performanceCountersBase->enable(); // Obtain required command buffer size. - uint32_t commandsSize = performanceCountersBase->getGpuCommandsSize(true); + uint32_t commandsSize = performanceCountersBase->getGpuCommandsSize(MetricsLibraryApi::GpuCommandBufferType::Render, true); EXPECT_EQ(0u, commandsSize); // Close library. @@ -334,7 +334,7 @@ TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenMetric EXPECT_TRUE(performanceCountersBase->isAvailable()); // Obtain required command buffer size. - uint32_t commandsSize = performanceCountersBase->getGpuCommandsSize(true); + uint32_t commandsSize = performanceCountersBase->getGpuCommandsSize(MetricsLibraryApi::GpuCommandBufferType::Render, true); EXPECT_NE(0u, commandsSize); // Fill command buffer. @@ -342,7 +342,7 @@ TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenMetric TagNode query = {}; HwPerfCounter perfCounter = {}; query.tagForCpuAccess = &perfCounter; - EXPECT_TRUE(performanceCountersBase->getGpuCommands(query, true, sizeof(buffer), buffer)); + EXPECT_TRUE(performanceCountersBase->getGpuCommands(MetricsLibraryApi::GpuCommandBufferType::Render, query, true, sizeof(buffer), buffer)); // Obtain api report size. uint32_t apiReportSize = performanceCountersBase->getApiReportSize(); @@ -405,8 +405,8 @@ TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenOaConf auto metricLibraryApi = static_cast(performanceCountersBase->getMetricsLibraryContext().data); metricLibraryApi->validCreateConfigurationOa = false; - EXPECT_EQ(0u, performanceCountersBase->getGpuCommandsSize(true)); - EXPECT_GT(performanceCountersBase->getGpuCommandsSize(false), 0u); + EXPECT_EQ(0u, performanceCountersBase->getGpuCommandsSize(MetricsLibraryApi::GpuCommandBufferType::Render, true)); + EXPECT_GT(performanceCountersBase->getGpuCommandsSize(MetricsLibraryApi::GpuCommandBufferType::Render, false), 0u); performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); @@ -421,8 +421,8 @@ TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenMetric auto metricLibraryApi = static_cast(performanceCountersBase->getMetricsLibraryContext().data); metricLibraryApi->validGpuReportSize = false; - EXPECT_EQ(0u, performanceCountersBase->getGpuCommandsSize(true)); - EXPECT_EQ(0u, performanceCountersBase->getGpuCommandsSize(false)); + EXPECT_EQ(0u, performanceCountersBase->getGpuCommandsSize(MetricsLibraryApi::GpuCommandBufferType::Render, true)); + EXPECT_EQ(0u, performanceCountersBase->getGpuCommandsSize(MetricsLibraryApi::GpuCommandBufferType::Render, false)); performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); @@ -438,8 +438,8 @@ TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenAllCon metricLibraryApi->validCreateConfigurationOa = true; metricLibraryApi->validCreateConfigurationUser = true; - EXPECT_GT(performanceCountersBase->getGpuCommandsSize(true), 0u); - EXPECT_GT(performanceCountersBase->getGpuCommandsSize(false), 0u); + EXPECT_GT(performanceCountersBase->getGpuCommandsSize(MetricsLibraryApi::GpuCommandBufferType::Render, true), 0u); + EXPECT_GT(performanceCountersBase->getGpuCommandsSize(MetricsLibraryApi::GpuCommandBufferType::Render, false), 0u); performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber()); @@ -454,8 +454,8 @@ TEST_F(PerformanceCountersMetricsLibraryTest, givenPerformanceCountersWhenOaConf auto metricLibraryApi = static_cast(performanceCountersBase->getMetricsLibraryContext().data); metricLibraryApi->validActivateConfigurationOa = false; - EXPECT_EQ(0u, performanceCountersBase->getGpuCommandsSize(true)); - EXPECT_GT(performanceCountersBase->getGpuCommandsSize(false), 0u); + EXPECT_EQ(0u, performanceCountersBase->getGpuCommandsSize(MetricsLibraryApi::GpuCommandBufferType::Render, true)); + EXPECT_GT(performanceCountersBase->getGpuCommandsSize(MetricsLibraryApi::GpuCommandBufferType::Render, false), 0u); performanceCountersBase->shutdown(); EXPECT_EQ(0u, performanceCountersBase->getReferenceNumber());