mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 14:02:58 +08:00
Metrics Library Performance Counters implementation.
Signed-off-by: Piotr Maciejewski <piotr.maciejewski@intel.com> Change-Id: I0f00dca1892f4857baaebc75ba2208a4f33db1bf
This commit is contained in:
committed by
sys_ocldev
parent
369982995d
commit
d1d794c658
@@ -88,10 +88,6 @@ endif()
|
||||
|
||||
target_compile_definitions(${NEO_STATIC_LIB_NAME} PUBLIC GMM_LIB_DLL DEFAULT_PLATFORM=${DEFAULT_SUPPORTED_PLATFORM})
|
||||
|
||||
if(INSTRUMENTATION_LIB_NAME)
|
||||
add_dependencies(${NEO_STATIC_LIB_NAME} ${INSTRUMENTATION_LIB_NAME})
|
||||
endif()
|
||||
|
||||
list(APPEND LIB_FLAGS_DEFINITIONS -DCIF_HEADERS_ONLY_BUILD ${SUPPORTED_GEN_FLAGS_DEFINITONS})
|
||||
|
||||
target_compile_definitions(${NEO_STATIC_LIB_NAME} PUBLIC ${LIB_FLAGS_DEFINITIONS})
|
||||
@@ -149,15 +145,10 @@ if(${GENERATE_EXECUTABLE})
|
||||
|
||||
add_subdirectory(dll)
|
||||
|
||||
if(HAVE_INSTRUMENTATION)
|
||||
target_link_libraries(${NEO_DYNAMIC_LIB_NAME} ${INSTRUMENTATION_LIB_NAME})
|
||||
endif()
|
||||
|
||||
target_link_libraries(${NEO_DYNAMIC_LIB_NAME} ${NEO_STATIC_LIB_NAME} ${IGDRCL_EXTRA_LIBS})
|
||||
|
||||
target_include_directories(${NEO_DYNAMIC_LIB_NAME} BEFORE PRIVATE
|
||||
${CMAKE_CURRENT_BINARY_DIR}
|
||||
${INSTRUMENTATION_INCLUDE_PATH}
|
||||
${AUB_STREAM_DIR}/..
|
||||
)
|
||||
|
||||
|
||||
@@ -3374,25 +3374,8 @@ clSetPerformanceConfigurationINTEL(
|
||||
cl_uint count,
|
||||
cl_uint *offsets,
|
||||
cl_uint *values) {
|
||||
Device *pDevice = nullptr;
|
||||
|
||||
auto retVal = validateObjects(WithCastToInternal(device, &pDevice));
|
||||
|
||||
API_ENTER(&retVal);
|
||||
DBG_LOG_INPUTS("device", device,
|
||||
"count", count,
|
||||
"offsets", offsets,
|
||||
"values", values);
|
||||
if (CL_SUCCESS != retVal) {
|
||||
return retVal;
|
||||
}
|
||||
if (!pDevice->getHardwareInfo().capabilityTable.instrumentationEnabled) {
|
||||
retVal = CL_PROFILING_INFO_NOT_AVAILABLE;
|
||||
return retVal;
|
||||
}
|
||||
auto perfCounters = pDevice->getPerformanceCounters();
|
||||
retVal = perfCounters->sendPerfConfiguration(count, offsets, values);
|
||||
return retVal;
|
||||
// Not supported, covered by Metric Library DLL.
|
||||
return CL_INVALID_OPERATION;
|
||||
}
|
||||
|
||||
void *clHostMemAllocINTEL(
|
||||
|
||||
@@ -90,9 +90,6 @@ CommandQueue::~CommandQueue() {
|
||||
}
|
||||
delete commandStream;
|
||||
|
||||
if (perfConfigurationData) {
|
||||
delete perfConfigurationData;
|
||||
}
|
||||
if (this->perfCountersEnabled) {
|
||||
device->getPerformanceCounters()->shutdown();
|
||||
}
|
||||
@@ -275,44 +272,32 @@ bool CommandQueue::setPerfCountersEnabled(bool perfCountersEnabled, cl_uint conf
|
||||
if (perfCountersEnabled == this->perfCountersEnabled) {
|
||||
return true;
|
||||
}
|
||||
// Only dynamic configuration (set 0) is supported.
|
||||
const uint32_t dynamicSet = 0;
|
||||
if (configuration != dynamicSet) {
|
||||
return false;
|
||||
}
|
||||
auto perfCounters = device->getPerformanceCounters();
|
||||
|
||||
if (perfCountersEnabled) {
|
||||
perfCounters->enable();
|
||||
if (!perfCounters->isAvailable()) {
|
||||
perfCounters->shutdown();
|
||||
return false;
|
||||
}
|
||||
perfConfigurationData = perfCounters->getPmRegsCfg(configuration);
|
||||
if (perfConfigurationData == nullptr) {
|
||||
perfCounters->shutdown();
|
||||
return false;
|
||||
}
|
||||
InstrReadRegsCfg *pUserCounters = &perfConfigurationData->ReadRegs;
|
||||
for (uint32_t i = 0; i < pUserCounters->RegsCount; ++i) {
|
||||
perfCountersUserRegistersNumber++;
|
||||
if (pUserCounters->Reg[i].BitSize > 32) {
|
||||
perfCountersUserRegistersNumber++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (perfCounters->isAvailable()) {
|
||||
perfCounters->shutdown();
|
||||
}
|
||||
perfCounters->shutdown();
|
||||
}
|
||||
this->perfCountersConfig = configuration;
|
||||
|
||||
this->perfCountersEnabled = perfCountersEnabled;
|
||||
|
||||
return true;
|
||||
}
|
||||
} // namespace NEO
|
||||
|
||||
PerformanceCounters *CommandQueue::getPerfCounters() {
|
||||
return device->getPerformanceCounters();
|
||||
}
|
||||
|
||||
bool CommandQueue::sendPerfCountersConfig() {
|
||||
return getPerfCounters()->sendPmRegsCfgCommands(perfConfigurationData, &perfCountersRegsCfgHandle, &perfCountersRegsCfgPending);
|
||||
}
|
||||
|
||||
cl_int CommandQueue::enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr, EventsRequest &eventsRequest) {
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
|
||||
|
||||
@@ -12,8 +12,6 @@
|
||||
#include "runtime/helpers/engine_control.h"
|
||||
#include "runtime/helpers/task_information.h"
|
||||
|
||||
#include "instrumentation.h"
|
||||
|
||||
#include <atomic>
|
||||
#include <cstdint>
|
||||
|
||||
@@ -374,24 +372,14 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
return perfCountersEnabled;
|
||||
}
|
||||
|
||||
InstrPmRegsCfg *getPerfCountersConfigData() {
|
||||
return perfConfigurationData;
|
||||
}
|
||||
|
||||
PerformanceCounters *getPerfCounters();
|
||||
|
||||
bool sendPerfCountersConfig();
|
||||
|
||||
bool setPerfCountersEnabled(bool perfCountersEnabled, cl_uint configuration);
|
||||
|
||||
void setIsSpecialCommandQueue(bool newValue) {
|
||||
this->isSpecialCommandQueue = newValue;
|
||||
}
|
||||
|
||||
uint16_t getPerfCountersUserRegistersNumber() const {
|
||||
return perfCountersUserRegistersNumber;
|
||||
}
|
||||
|
||||
QueuePriority getPriority() const {
|
||||
return priority;
|
||||
}
|
||||
@@ -462,11 +450,6 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
QueueThrottle throttle = QueueThrottle::MEDIUM;
|
||||
|
||||
bool perfCountersEnabled = false;
|
||||
cl_uint perfCountersConfig = std::numeric_limits<uint32_t>::max();
|
||||
uint32_t perfCountersUserRegistersNumber = 0;
|
||||
InstrPmRegsCfg *perfConfigurationData = nullptr;
|
||||
uint32_t perfCountersRegsCfgHandle = 0;
|
||||
bool perfCountersRegsCfgPending = false;
|
||||
|
||||
LinearStream *commandStream = nullptr;
|
||||
|
||||
|
||||
@@ -403,11 +403,6 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
|
||||
if (event && this->isProfilingEnabled()) {
|
||||
// Get allocation for timestamps
|
||||
hwTimeStamps = event->getHwTimeStampNode();
|
||||
if (this->isPerfCountersEnabled()) {
|
||||
hwPerfCounter = event->getHwPerfCounterNode();
|
||||
// PERF COUNTER: copy current configuration from queue to event
|
||||
event->copyPerfCounters(this->getPerfCountersConfigData());
|
||||
}
|
||||
}
|
||||
|
||||
if (parentKernel) {
|
||||
@@ -421,6 +416,10 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
|
||||
}
|
||||
}
|
||||
|
||||
if (event && this->isPerfCountersEnabled()) {
|
||||
hwPerfCounter = event->getHwPerfCounterNode();
|
||||
}
|
||||
|
||||
HardwareInterface<GfxFamily>::dispatchWalker(
|
||||
*this,
|
||||
multiDispatchInfo,
|
||||
|
||||
@@ -175,24 +175,6 @@ class GpgpuWalkerHelper {
|
||||
uint32_t aluRegister,
|
||||
uint32_t operation,
|
||||
uint32_t mask);
|
||||
|
||||
static void dispatchStoreRegisterCommand(
|
||||
LinearStream *commandStream,
|
||||
uint64_t memoryAddress,
|
||||
uint32_t registerAddress);
|
||||
|
||||
static void dispatchPerfCountersGeneralPurposeCounterCommands(
|
||||
LinearStream *commandStream,
|
||||
uint64_t baseAddress);
|
||||
|
||||
static void dispatchPerfCountersUserCounterCommands(
|
||||
CommandQueue &commandQueue,
|
||||
LinearStream *commandStream,
|
||||
uint64_t baseAddress);
|
||||
|
||||
static void dispatchPerfCountersOABufferStateCommands(
|
||||
TagNode<HwPerfCounter> &hwPerfCounter,
|
||||
LinearStream *commandStream);
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
||||
@@ -24,8 +24,6 @@
|
||||
#include "runtime/memory_manager/graphics_allocation.h"
|
||||
#include "runtime/utilities/tag_allocator.h"
|
||||
|
||||
#include "instrumentation.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
|
||||
@@ -149,102 +147,17 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchProfilingCommandsEnd(
|
||||
pMICmdLow->setMemoryAddress(timeStampAddress);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void GpgpuWalkerHelper<GfxFamily>::dispatchStoreRegisterCommand(
|
||||
LinearStream *commandStream,
|
||||
uint64_t memoryAddress,
|
||||
uint32_t registerAddress) {
|
||||
|
||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||
|
||||
auto pCmd = commandStream->getSpaceForCmd<MI_STORE_REGISTER_MEM>();
|
||||
*pCmd = GfxFamily::cmdInitStoreRegisterMem;
|
||||
pCmd->setRegisterAddress(registerAddress);
|
||||
pCmd->setMemoryAddress(memoryAddress);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersGeneralPurposeCounterCommands(
|
||||
LinearStream *commandStream,
|
||||
uint64_t baseAddress) {
|
||||
|
||||
// Read General Purpose counters
|
||||
for (auto i = 0u; i < NEO::INSTR_GENERAL_PURPOSE_COUNTERS_COUNT; i++) {
|
||||
uint32_t regAddr = INSTR_GFX_OFFSETS::INSTR_PERF_CNT_1_DW0 + i * sizeof(cl_uint);
|
||||
//Gp field is 2*uint64 wide so it can hold 4 uint32
|
||||
uint64_t address = baseAddress + i * sizeof(cl_uint);
|
||||
dispatchStoreRegisterCommand(commandStream, address, regAddr);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersUserCounterCommands(
|
||||
CommandQueue &commandQueue,
|
||||
LinearStream *commandStream,
|
||||
uint64_t baseAddress) {
|
||||
|
||||
auto userRegs = &commandQueue.getPerfCountersConfigData()->ReadRegs;
|
||||
|
||||
for (uint32_t i = 0; i < userRegs->RegsCount; i++) {
|
||||
uint32_t regAddr = userRegs->Reg[i].Offset;
|
||||
//offset between base (low) registers is cl_ulong wide
|
||||
uint64_t address = baseAddress + i * sizeof(cl_ulong);
|
||||
dispatchStoreRegisterCommand(commandStream, address, regAddr);
|
||||
|
||||
if (userRegs->Reg[i].BitSize > 32) {
|
||||
dispatchStoreRegisterCommand(commandStream, address + sizeof(cl_uint), regAddr + sizeof(cl_uint));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersOABufferStateCommands(
|
||||
TagNode<HwPerfCounter> &hwPerfCounter,
|
||||
LinearStream *commandStream) {
|
||||
|
||||
dispatchStoreRegisterCommand(commandStream, hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.OaStatus), INSTR_GFX_OFFSETS::INSTR_OA_STATUS);
|
||||
dispatchStoreRegisterCommand(commandStream, hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.OaHead), INSTR_GFX_OFFSETS::INSTR_OA_HEAD_PTR);
|
||||
dispatchStoreRegisterCommand(commandStream, hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.OaTail), INSTR_GFX_OFFSETS::INSTR_OA_TAIL_PTR);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersCommandsStart(
|
||||
CommandQueue &commandQueue,
|
||||
TagNode<HwPerfCounter> &hwPerfCounter,
|
||||
LinearStream *commandStream) {
|
||||
|
||||
using MI_REPORT_PERF_COUNT = typename GfxFamily::MI_REPORT_PERF_COUNT;
|
||||
auto pPerformanceCounters = commandQueue.getPerfCounters();
|
||||
const uint32_t size = pPerformanceCounters->getGpuCommandsSize(true);
|
||||
void *pBuffer = commandStream->getSpace(size);
|
||||
|
||||
auto perfCounters = commandQueue.getPerfCounters();
|
||||
|
||||
uint32_t currentReportId = perfCounters->getCurrentReportId();
|
||||
uint64_t address = 0;
|
||||
//flush command streamer
|
||||
auto pPipeControlCmd = commandStream->getSpaceForCmd<PIPE_CONTROL>();
|
||||
*pPipeControlCmd = GfxFamily::cmdInitPipeControl;
|
||||
pPipeControlCmd->setCommandStreamerStallEnable(true);
|
||||
|
||||
//Store value of NOOPID register
|
||||
GpgpuWalkerHelper<GfxFamily>::dispatchStoreRegisterCommand(commandStream, hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.DMAFenceIdBegin), INSTR_MMIO_NOOPID);
|
||||
|
||||
//Read Core Frequency
|
||||
GpgpuWalkerHelper<GfxFamily>::dispatchStoreRegisterCommand(commandStream, hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.CoreFreqBegin), INSTR_MMIO_RPSTAT1);
|
||||
|
||||
GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersGeneralPurposeCounterCommands(commandStream, hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.HwPerfReportBegin.Gp));
|
||||
|
||||
auto pReportPerfCount = commandStream->getSpaceForCmd<MI_REPORT_PERF_COUNT>();
|
||||
*pReportPerfCount = GfxFamily::cmdInitReportPerfCount;
|
||||
pReportPerfCount->setReportId(currentReportId);
|
||||
address = hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.HwPerfReportBegin.Oa);
|
||||
pReportPerfCount->setMemoryAddress(address);
|
||||
|
||||
address = hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWTimeStamp.GlobalStartTS);
|
||||
|
||||
PipeControlHelper<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(commandStream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, address, 0llu, false);
|
||||
|
||||
GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersUserCounterCommands(commandQueue, commandStream, hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.HwPerfReportBegin.User));
|
||||
|
||||
commandQueue.sendPerfCountersConfig();
|
||||
pPerformanceCounters->getGpuCommands(hwPerfCounter, true, size, pBuffer);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
@@ -253,40 +166,11 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersCommandsEnd(
|
||||
TagNode<HwPerfCounter> &hwPerfCounter,
|
||||
LinearStream *commandStream) {
|
||||
|
||||
using MI_REPORT_PERF_COUNT = typename GfxFamily::MI_REPORT_PERF_COUNT;
|
||||
auto pPerformanceCounters = commandQueue.getPerfCounters();
|
||||
const uint32_t size = pPerformanceCounters->getGpuCommandsSize(false);
|
||||
void *pBuffer = commandStream->getSpace(size);
|
||||
|
||||
auto perfCounters = commandQueue.getPerfCounters();
|
||||
|
||||
uint32_t currentReportId = perfCounters->getCurrentReportId();
|
||||
|
||||
//flush command streamer
|
||||
auto pPipeControlCmd = commandStream->getSpaceForCmd<PIPE_CONTROL>();
|
||||
*pPipeControlCmd = GfxFamily::cmdInitPipeControl;
|
||||
pPipeControlCmd->setCommandStreamerStallEnable(true);
|
||||
|
||||
GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersOABufferStateCommands(hwPerfCounter, commandStream);
|
||||
|
||||
//Timestamp: Global End
|
||||
uint64_t address = hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWTimeStamp.GlobalEndTS);
|
||||
PipeControlHelper<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(commandStream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, address, 0llu, false);
|
||||
|
||||
auto pReportPerfCount = commandStream->getSpaceForCmd<MI_REPORT_PERF_COUNT>();
|
||||
*pReportPerfCount = GfxFamily::cmdInitReportPerfCount;
|
||||
pReportPerfCount->setReportId(currentReportId);
|
||||
address = hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.HwPerfReportEnd.Oa);
|
||||
pReportPerfCount->setMemoryAddress(address);
|
||||
|
||||
GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersGeneralPurposeCounterCommands(commandStream, hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.HwPerfReportEnd.Gp));
|
||||
|
||||
//Store value of NOOPID register
|
||||
GpgpuWalkerHelper<GfxFamily>::dispatchStoreRegisterCommand(commandStream, hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.DMAFenceIdEnd), INSTR_MMIO_NOOPID);
|
||||
|
||||
//Read Core Frequency
|
||||
GpgpuWalkerHelper<GfxFamily>::dispatchStoreRegisterCommand(commandStream, hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.CoreFreqEnd), INSTR_MMIO_RPSTAT1);
|
||||
|
||||
GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersUserCounterCommands(commandQueue, commandStream, hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.HwPerfReportEnd.User));
|
||||
|
||||
perfCounters->setCpuTimestamp();
|
||||
pPerformanceCounters->getGpuCommands(hwPerfCounter, false, size, pBuffer);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
||||
@@ -189,31 +189,8 @@ size_t EnqueueOperation<GfxFamily>::getSizeRequiredCSKernel(bool reserveProfilin
|
||||
size += 2 * sizeof(PIPE_CONTROL) + 2 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
|
||||
}
|
||||
if (reservePerfCounters) {
|
||||
//start cmds
|
||||
//P_C: flush CS & TimeStamp BEGIN
|
||||
size += 2 * sizeof(PIPE_CONTROL);
|
||||
//SRM NOOPID & Frequency
|
||||
size += 2 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
|
||||
//gp registers
|
||||
size += NEO::INSTR_GENERAL_PURPOSE_COUNTERS_COUNT * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
|
||||
//report perf count
|
||||
size += sizeof(typename GfxFamily::MI_REPORT_PERF_COUNT);
|
||||
//user registers
|
||||
size += commandQueue.getPerfCountersUserRegistersNumber() * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
|
||||
|
||||
//end cmds
|
||||
//P_C: flush CS & TimeStamp END;
|
||||
size += 2 * sizeof(PIPE_CONTROL);
|
||||
//OA buffer (status head, tail)
|
||||
size += 3 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
|
||||
//report perf count
|
||||
size += sizeof(typename GfxFamily::MI_REPORT_PERF_COUNT);
|
||||
//gp registers
|
||||
size += NEO::INSTR_GENERAL_PURPOSE_COUNTERS_COUNT * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
|
||||
//SRM NOOPID & Frequency
|
||||
size += 2 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
|
||||
//user registers
|
||||
size += commandQueue.getPerfCountersUserRegistersNumber() * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
|
||||
size += commandQueue.getPerfCounters()->getGpuCommandsSize(true);
|
||||
size += commandQueue.getPerfCounters()->getGpuCommandsSize(false);
|
||||
}
|
||||
size += GpgpuWalkerHelper<GfxFamily>::getSizeForWADisableLSQCROPERFforOCL(pKernel);
|
||||
|
||||
|
||||
@@ -414,9 +414,9 @@ TagAllocator<HwTimeStamps> *CommandStreamReceiver::getEventTsAllocator() {
|
||||
return profilingTimeStampAllocator.get();
|
||||
}
|
||||
|
||||
TagAllocator<HwPerfCounter> *CommandStreamReceiver::getEventPerfCountAllocator() {
|
||||
TagAllocator<HwPerfCounter> *CommandStreamReceiver::getEventPerfCountAllocator(const uint32_t tagSize) {
|
||||
if (perfCounterAllocator.get() == nullptr) {
|
||||
perfCounterAllocator = std::make_unique<TagAllocator<HwPerfCounter>>(getMemoryManager(), getPreferredTagPoolSize(), MemoryConstants::cacheLineSize);
|
||||
perfCounterAllocator = std::make_unique<TagAllocator<HwPerfCounter>>(getMemoryManager(), getPreferredTagPoolSize(), MemoryConstants::cacheLineSize, tagSize);
|
||||
}
|
||||
return perfCounterAllocator.get();
|
||||
}
|
||||
|
||||
@@ -162,7 +162,7 @@ class CommandStreamReceiver {
|
||||
OsContext &getOsContext() const { return *osContext; }
|
||||
|
||||
TagAllocator<HwTimeStamps> *getEventTsAllocator();
|
||||
TagAllocator<HwPerfCounter> *getEventPerfCountAllocator();
|
||||
TagAllocator<HwPerfCounter> *getEventPerfCountAllocator(const uint32_t tagSize);
|
||||
TagAllocator<TimestampPacketStorage> *getTimestampPacketAllocator();
|
||||
|
||||
virtual cl_int expectMemory(const void *gfxAddress, const void *srcAddress, size_t length, uint32_t compareOperation);
|
||||
|
||||
@@ -122,8 +122,7 @@ bool Device::createDeviceImpl() {
|
||||
auto &hwInfo = getHardwareInfo();
|
||||
if (osTime->getOSInterface()) {
|
||||
if (hwInfo.capabilityTable.instrumentationEnabled) {
|
||||
performanceCounters = createPerformanceCountersFunc(osTime.get());
|
||||
performanceCounters->initialize(&hwInfo);
|
||||
performanceCounters = createPerformanceCountersFunc(this);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -20,4 +20,11 @@ const char *gmmEntryName = GMM_ENTRY_NAME;
|
||||
|
||||
const char *sysFsPciPath = "/sys/bus/pci/devices/";
|
||||
const char *tbxLibName = "libtbxAccess.so";
|
||||
|
||||
// Os specific Metrics Library name
|
||||
#if __x86_64__ || __ppc64__
|
||||
const char *metricsLibraryDllName = "libigdml64.so";
|
||||
#else
|
||||
const char *metricsLibraryDllName = "libigdml32.so";
|
||||
#endif
|
||||
} // namespace Os
|
||||
|
||||
@@ -15,4 +15,11 @@ const char *igcDllName = IGC_LIBRARY_NAME;
|
||||
const char *gdiDllName = "gdi32.dll";
|
||||
const char *gmmDllName = GMM_UMD_DLL;
|
||||
const char *gmmEntryName = GMM_ENTRY_NAME;
|
||||
|
||||
// Os specific Metrics Library name
|
||||
#if _WIN64
|
||||
const char *metricsLibraryDllName = "igdml64.dll";
|
||||
#else
|
||||
const char *metricsLibraryDllName = "igdml32.dll";
|
||||
#endif
|
||||
} // namespace Os
|
||||
|
||||
@@ -26,6 +26,8 @@
|
||||
#include "runtime/utilities/stackvec.h"
|
||||
#include "runtime/utilities/tag_allocator.h"
|
||||
|
||||
#define OCLRT_NUM_TIMESTAMP_BITS (32)
|
||||
|
||||
namespace NEO {
|
||||
|
||||
const cl_uint Event::eventNotReady = 0xFFFFFFF0;
|
||||
@@ -136,9 +138,6 @@ Event::~Event() {
|
||||
if (ctx != nullptr) {
|
||||
ctx->decRefInternal();
|
||||
}
|
||||
if (perfConfigurationData) {
|
||||
delete perfConfigurationData;
|
||||
}
|
||||
|
||||
// in case event did not unblock child events before
|
||||
unblockEventsBlockedByThis(executionStatus);
|
||||
@@ -201,12 +200,10 @@ cl_int Event::getEventProfilingInfo(cl_profiling_info paramName,
|
||||
if (!perfCountersEnabled) {
|
||||
return CL_INVALID_VALUE;
|
||||
}
|
||||
if (!cmdQueue->getPerfCounters()->processEventReport(paramValueSize,
|
||||
paramValue,
|
||||
paramValueSizeRet,
|
||||
getHwPerfCounterNode()->tagForCpuAccess,
|
||||
perfConfigurationData,
|
||||
updateStatusAndCheckCompletion())) {
|
||||
if (!cmdQueue->getPerfCounters()->getApiReport(paramValueSize,
|
||||
paramValue,
|
||||
paramValueSizeRet,
|
||||
updateStatusAndCheckCompletion())) {
|
||||
return CL_PROFILING_INFO_NOT_AVAILABLE;
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
@@ -701,17 +698,14 @@ TagNode<HwTimeStamps> *Event::getHwTimeStampNode() {
|
||||
}
|
||||
|
||||
TagNode<HwPerfCounter> *Event::getHwPerfCounterNode() {
|
||||
if (!perfCounterNode) {
|
||||
perfCounterNode = cmdQueue->getCommandStreamReceiver().getEventPerfCountAllocator()->getTag();
|
||||
|
||||
if (!perfCounterNode && cmdQueue->getPerfCounters()) {
|
||||
const uint32_t gpuReportSize = cmdQueue->getPerfCounters()->getGpuReportSize();
|
||||
perfCounterNode = cmdQueue->getCommandStreamReceiver().getEventPerfCountAllocator(gpuReportSize)->getTag();
|
||||
}
|
||||
return perfCounterNode;
|
||||
}
|
||||
|
||||
void Event::copyPerfCounters(InstrPmRegsCfg *config) {
|
||||
perfConfigurationData = new InstrPmRegsCfg;
|
||||
memcpy_s(perfConfigurationData, sizeof(InstrPmRegsCfg), config, sizeof(InstrPmRegsCfg));
|
||||
}
|
||||
|
||||
void Event::addTimestampPacketNodes(const TimestampPacketContainer &inputTimestampPacketContainer) {
|
||||
timestampPacketContainer->assignAndIncrementNodesRefCounts(inputTimestampPacketContainer);
|
||||
}
|
||||
|
||||
@@ -21,8 +21,6 @@
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
#define OCLRT_NUM_TIMESTAMP_BITS (32)
|
||||
|
||||
namespace NEO {
|
||||
template <typename TagType>
|
||||
struct TagNode;
|
||||
@@ -121,8 +119,6 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
|
||||
this->perfCountersEnabled = perfCountersEnabled;
|
||||
}
|
||||
|
||||
void copyPerfCounters(InstrPmRegsCfg *config);
|
||||
|
||||
TagNode<HwPerfCounter> *getHwPerfCounterNode();
|
||||
|
||||
std::unique_ptr<FlushStampTracker> flushStamp;
|
||||
@@ -375,7 +371,6 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
|
||||
TagNode<HwTimeStamps> *timeStampNode = nullptr;
|
||||
TagNode<HwPerfCounter> *perfCounterNode = nullptr;
|
||||
std::unique_ptr<TimestampPacketContainer> timestampPacketContainer;
|
||||
InstrPmRegsCfg *perfConfigurationData = nullptr;
|
||||
//number of events this event depends on
|
||||
std::atomic<int> parentCount;
|
||||
//event parents
|
||||
|
||||
@@ -10,20 +10,22 @@
|
||||
#include "runtime/event/hw_timestamps.h"
|
||||
#include "runtime/memory_manager/graphics_allocation.h"
|
||||
|
||||
#include "instrumentation.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
struct HwPerfCounter {
|
||||
void initialize() {
|
||||
HWPerfCounters = {};
|
||||
HWTimeStamp.initialize();
|
||||
report[0] = 0;
|
||||
}
|
||||
|
||||
static GraphicsAllocation::AllocationType getAllocationType() {
|
||||
return GraphicsAllocation::AllocationType::PROFILING_TAG_BUFFER;
|
||||
}
|
||||
bool canBeReleased() const { return true; }
|
||||
HwPerfCounters HWPerfCounters;
|
||||
HwTimeStamps HWTimeStamp;
|
||||
|
||||
// Gpu report size is not known during compile time.
|
||||
// Such information will be provided by metrics library dll.
|
||||
// Bellow variable will be allocated dynamically based on information
|
||||
// from metrics library. Take look at CommandStreamReceiver::getEventPerfCountAllocator.
|
||||
uint8_t report[1] = {};
|
||||
};
|
||||
} // namespace NEO
|
||||
|
||||
@@ -17,6 +17,11 @@ uint32_t HwHelperHw<Family>::getComputeUnitsUsedForScratch(const HardwareInfo *p
|
||||
return pHwInfo->gtSystemInfo.MaxSubSlicesSupported * pHwInfo->gtSystemInfo.MaxEuPerSubSlice * 8;
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t HwHelperHw<Family>::getMetricsLibraryGenId() const {
|
||||
return static_cast<uint32_t>(MetricsLibraryApi::ClientGen::Gen11);
|
||||
}
|
||||
|
||||
template class AubHelperHw<Family>;
|
||||
template class HwHelperHw<Family>;
|
||||
template class FlatBatchBufferHelperHw<Family>;
|
||||
|
||||
@@ -33,6 +33,11 @@ void PipeControlHelper<Family>::addPipeControlWA(LinearStream &commandStream) {
|
||||
pCmd->setCommandStreamerStallEnable(true);
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t HwHelperHw<Family>::getMetricsLibraryGenId() const {
|
||||
return static_cast<uint32_t>(MetricsLibraryApi::ClientGen::Gen9);
|
||||
}
|
||||
|
||||
template class AubHelperHw<Family>;
|
||||
template class HwHelperHw<Family>;
|
||||
template class FlatBatchBufferHelperHw<Family>;
|
||||
|
||||
@@ -64,6 +64,7 @@ class HwHelper {
|
||||
virtual bool getEnableLocalMemory(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual std::string getExtensions() const = 0;
|
||||
static uint32_t getMaxThreadsForVfe(const HardwareInfo &hwInfo);
|
||||
virtual uint32_t getMetricsLibraryGenId() const = 0;
|
||||
|
||||
static constexpr uint32_t lowPriorityGpgpuEngineIndex = 1;
|
||||
|
||||
@@ -155,6 +156,8 @@ class HwHelperHw : public HwHelper {
|
||||
|
||||
std::string getExtensions() const override;
|
||||
|
||||
uint32_t getMetricsLibraryGenId() const override;
|
||||
|
||||
protected:
|
||||
HwHelperHw() = default;
|
||||
};
|
||||
|
||||
@@ -204,4 +204,9 @@ int PipeControlHelper<GfxFamily>::getRequiredPipeControlSize() {
|
||||
return pipeControlCount * sizeof(typename GfxFamily::PIPE_CONTROL);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t HwHelperHw<GfxFamily>::getMetricsLibraryGenId() const {
|
||||
return static_cast<uint32_t>(MetricsLibraryApi::ClientGen::Gen9);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -9,86 +9,4 @@
|
||||
|
||||
namespace NEO {
|
||||
const bool haveInstrumentation = false;
|
||||
|
||||
bool instrAutoSamplingStart(
|
||||
InstrEscCbData cbData,
|
||||
void **ppOAInterface) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool instrAutoSamplingStop(
|
||||
void **ppOAInterface) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool instrCheckPmRegsCfg(
|
||||
InstrPmRegsCfg *pQueryPmRegsCfg,
|
||||
uint32_t *pLastPmRegsCfgHandle,
|
||||
const void *pASInterface) {
|
||||
return false;
|
||||
}
|
||||
|
||||
void instrGetPerfCountersQueryData(
|
||||
InstrEscCbData cbData,
|
||||
GTDI_QUERY *pData,
|
||||
HwPerfCounters *pLayout,
|
||||
uint64_t cpuRawTimestamp,
|
||||
void *pASInterface,
|
||||
InstrPmRegsCfg *pPmRegsCfg,
|
||||
bool useMiRPC,
|
||||
bool resetASData,
|
||||
const InstrAllowedContexts *pAllowedContexts) {
|
||||
}
|
||||
|
||||
bool instrEscGetPmRegsCfg(
|
||||
InstrEscCbData cbData,
|
||||
uint32_t cfgId,
|
||||
InstrPmRegsCfg *pCfg,
|
||||
InstrAutoSamplingMode *pAutoSampling) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool instrEscHwMetricsEnable(
|
||||
InstrEscCbData cbData,
|
||||
bool enable) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool instrEscLoadPmRegsCfg(
|
||||
InstrEscCbData cbData,
|
||||
InstrPmRegsCfg *pCfg,
|
||||
bool hardwareAccess) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool instrEscSetPmRegsCfg(
|
||||
InstrEscCbData cbData,
|
||||
uint32_t count,
|
||||
uint32_t *pOffsets,
|
||||
uint32_t *pValues) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool instrEscSendReadRegsCfg(
|
||||
InstrEscCbData cbData,
|
||||
uint32_t count,
|
||||
uint32_t *pOffsets,
|
||||
uint32_t *pBitSizes) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool instrSetAvailable(bool enabled) {
|
||||
return false;
|
||||
}
|
||||
|
||||
void instrEscVerifyEnable(
|
||||
InstrEscCbData cbData) {
|
||||
}
|
||||
|
||||
uint32_t instrSetPlatformInfo(
|
||||
uint32_t productId,
|
||||
void *featureTable) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -10,201 +10,162 @@
|
||||
#include <cstdint>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
constexpr unsigned int INSTR_GENERAL_PURPOSE_COUNTERS_COUNT = 4;
|
||||
constexpr unsigned int INSTR_MAX_USER_COUNTERS_COUNT = 32;
|
||||
constexpr unsigned int INSTR_MMIO_NOOPID = 0x2094;
|
||||
constexpr unsigned int INSTR_MMIO_RPSTAT1 = 0xA01C;
|
||||
|
||||
constexpr unsigned int INSTR_GTDI_MAX_READ_REGS = 16;
|
||||
constexpr unsigned int INSTR_GTDI_PERF_METRICS_OA_COUNT = 36;
|
||||
constexpr unsigned int INSTR_GTDI_PERF_METRICS_OA_40b_COUNT = 32;
|
||||
constexpr unsigned int INSTR_GTDI_PERF_METRICS_NOA_COUNT = 16;
|
||||
constexpr unsigned int INSTR_MAX_CONTEXT_TAGS = 128;
|
||||
|
||||
constexpr unsigned int INSTR_MAX_OA_PROLOG = 2;
|
||||
constexpr unsigned int INSTR_MAX_OA_EPILOG = 2;
|
||||
constexpr unsigned int INSTR_MAX_PM_REGS_BASE = 256;
|
||||
constexpr unsigned int INSTR_MAX_PM_REGS = (INSTR_MAX_PM_REGS_BASE + INSTR_MAX_OA_PROLOG + INSTR_MAX_OA_EPILOG);
|
||||
|
||||
constexpr unsigned int INSTR_PM_REGS_CFG_INVALID = 0;
|
||||
constexpr unsigned int INSTR_READ_REGS_CFG_TAG = 0xFFFFFFFE;
|
||||
constexpr unsigned int INSTR_MAX_READ_REGS = 16;
|
||||
|
||||
extern const bool haveInstrumentation;
|
||||
} // namespace NEO
|
||||
|
||||
typedef enum {
|
||||
INSTR_AS_MODE_OFF,
|
||||
INSTR_AS_MODE_EVENT,
|
||||
INSTR_AS_MODE_TIMER,
|
||||
INSTR_AS_MODE_DMA
|
||||
} InstrAutoSamplingMode;
|
||||
namespace MetricsLibraryApi {
|
||||
// Dummy macros.
|
||||
#define ML_STDCALL
|
||||
#define METRICS_LIBRARY_CONTEXT_CREATE_1_0 "create"
|
||||
#define METRICS_LIBRARY_CONTEXT_DELETE_1_0 "delete"
|
||||
|
||||
typedef enum GTDI_CONFIGURATION_SET {
|
||||
GTDI_CONFIGURATION_SET_DYNAMIC = 0,
|
||||
GTDI_CONFIGURATION_SET_1,
|
||||
GTDI_CONFIGURATION_SET_2,
|
||||
GTDI_CONFIGURATION_SET_3,
|
||||
GTDI_CONFIGURATION_SET_4,
|
||||
GTDI_CONFIGURATION_SET_COUNT,
|
||||
GTDI_CONFIGURATION_SET_MAX = 0xFFFFFFFF
|
||||
} GTDI_CONFIGURATION_SET;
|
||||
// Dummy enumerators.
|
||||
enum class ClientApi : uint32_t { OpenCL };
|
||||
enum class ClientGen : uint32_t { Unknown,
|
||||
Gen9,
|
||||
Gen11 };
|
||||
enum class ValueType : uint32_t { Uint32 };
|
||||
enum class GpuConfigurationActivationType : uint32_t { Tbs,
|
||||
EscapeCode };
|
||||
enum class ObjectType : uint32_t { QueryHwCounters,
|
||||
ConfigurationHwCountersUser,
|
||||
ConfigurationHwCountersOa };
|
||||
enum class ParameterType : uint32_t { QueryHwCountersReportApiSize,
|
||||
QueryHwCountersReportGpuSize };
|
||||
enum class StatusCode : uint32_t { Failed,
|
||||
IncorrectObject,
|
||||
Success };
|
||||
enum class GpuCommandBufferType : uint32_t { Render };
|
||||
|
||||
enum INSTR_GFX_OFFSETS {
|
||||
INSTR_PERF_CNT_1_DW0 = 0x91B8,
|
||||
INSTR_PERF_CNT_1_DW1 = 0x91BC,
|
||||
INSTR_PERF_CNT_2_DW0 = 0x91C0,
|
||||
INSTR_PERF_CNT_2_DW1 = 0x91C4,
|
||||
INSTR_OA_STATUS = 0x2B08,
|
||||
INSTR_OA_HEAD_PTR = 0x2B0C,
|
||||
INSTR_OA_TAIL_PTR = 0x2B10
|
||||
// Dummy handles.
|
||||
struct Handle {
|
||||
void *data;
|
||||
bool IsValid() const { return data != nullptr; } // NOLINT
|
||||
};
|
||||
struct QueryHandle_1_0 : Handle {};
|
||||
struct ConfigurationHandle_1_0 : Handle {};
|
||||
struct ContextHandle_1_0 : Handle {};
|
||||
|
||||
// Dummy structures.
|
||||
struct ClientCallbacks_1_0 {};
|
||||
|
||||
struct ClientDataWindows_1_0 {
|
||||
void *Device;
|
||||
void *Adapter;
|
||||
void *Escape;
|
||||
bool KmdInstrumentationEnabled;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
struct ClientDataLinux_1_0 {
|
||||
void *Reserved;
|
||||
};
|
||||
|
||||
} GTDI_QUERY;
|
||||
struct ClientData_1_0 {
|
||||
union {
|
||||
ClientDataWindows_1_0 Windows;
|
||||
ClientDataLinux_1_0 Linux;
|
||||
};
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
uint32_t contextId[INSTR_MAX_CONTEXT_TAGS];
|
||||
uint32_t count;
|
||||
} InstrAllowedContexts;
|
||||
struct ConfigurationActivateData_1_0 {
|
||||
GpuConfigurationActivationType Type;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
uint64_t counter[INSTR_GTDI_MAX_READ_REGS];
|
||||
uint32_t userCntrCfgId;
|
||||
} InstrReportDataUser;
|
||||
struct ClientType_1_0 {
|
||||
ClientApi Api;
|
||||
ClientGen Gen;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
uint32_t reportId;
|
||||
uint32_t timestamp;
|
||||
uint32_t contextId;
|
||||
uint32_t gpuTicksCounter;
|
||||
} InstrReportDataOaHeader;
|
||||
struct TypedValue_1_0 {
|
||||
uint32_t ValueUInt32;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
uint32_t oaCounter[INSTR_GTDI_PERF_METRICS_OA_COUNT];
|
||||
uint8_t oaCounterHB[INSTR_GTDI_PERF_METRICS_OA_40b_COUNT];
|
||||
uint32_t noaCounter[INSTR_GTDI_PERF_METRICS_NOA_COUNT];
|
||||
} InstrReportDataOaData;
|
||||
struct GpuMemory_1_0 {
|
||||
uint64_t GpuAddress;
|
||||
void *CpuAddress;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
InstrReportDataOaHeader header;
|
||||
InstrReportDataOaData data;
|
||||
} InstrReportDataOa;
|
||||
struct CommandBufferQueryHwCounters_1_0 {
|
||||
QueryHandle_1_0 Handle;
|
||||
ConfigurationHandle_1_0 HandleUserConfiguration;
|
||||
bool Begin;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
uint64_t counter1;
|
||||
uint64_t counter2;
|
||||
} InstrReportDataMonitor;
|
||||
struct CommandBufferSize_1_0 {
|
||||
uint32_t GpuMemorySize;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
InstrReportDataMonitor Gp;
|
||||
InstrReportDataUser User;
|
||||
InstrReportDataOa Oa;
|
||||
} InstrReportData;
|
||||
struct ConfigurationCreateData_1_0 {
|
||||
ContextHandle_1_0 HandleContext;
|
||||
ObjectType Type;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
uint32_t DMAFenceIdBegin;
|
||||
uint32_t DMAFenceIdEnd;
|
||||
uint32_t CoreFreqBegin;
|
||||
uint32_t CoreFreqEnd;
|
||||
InstrReportData HwPerfReportBegin;
|
||||
InstrReportData HwPerfReportEnd;
|
||||
uint32_t OaStatus;
|
||||
uint32_t OaHead;
|
||||
uint32_t OaTail;
|
||||
} HwPerfCounters;
|
||||
struct CommandBufferData_1_0 {
|
||||
ContextHandle_1_0 HandleContext;
|
||||
ObjectType CommandsType;
|
||||
GpuCommandBufferType Type;
|
||||
GpuMemory_1_0 Allocation;
|
||||
void *Data;
|
||||
uint32_t Size;
|
||||
CommandBufferQueryHwCounters_1_0 QueryHwCounters;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
uint32_t Offset;
|
||||
uint32_t BitSize;
|
||||
} InstrPmReg;
|
||||
struct QueryCreateData_1_0 {
|
||||
ContextHandle_1_0 HandleContext;
|
||||
ObjectType Type;
|
||||
uint32_t Slots;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
uint32_t Handle;
|
||||
uint32_t RegsCount;
|
||||
} InstrPmRegsOaCountersCfg;
|
||||
struct GetReportQuery_1_0 {
|
||||
QueryHandle_1_0 Handle;
|
||||
|
||||
typedef struct {
|
||||
uint32_t Handle;
|
||||
uint32_t RegsCount;
|
||||
} InstrPmRegsGpCountersCfg;
|
||||
uint32_t Slot;
|
||||
uint32_t SlotsCount;
|
||||
|
||||
typedef struct {
|
||||
InstrPmReg Reg[INSTR_MAX_READ_REGS];
|
||||
uint32_t RegsCount;
|
||||
} InstrReadRegsCfg;
|
||||
uint32_t DataSize;
|
||||
void *Data;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
InstrPmRegsOaCountersCfg OaCounters;
|
||||
InstrPmRegsGpCountersCfg GpCounters;
|
||||
InstrReadRegsCfg ReadRegs;
|
||||
} InstrPmRegsCfg;
|
||||
struct GetReportData_1_0 {
|
||||
ObjectType Type;
|
||||
GetReportQuery_1_0 Query;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
void *hAdapter;
|
||||
void *hDevice;
|
||||
void *pfnEscapeCb;
|
||||
bool DDI;
|
||||
} InstrEscCbData;
|
||||
struct ContextCreateData_1_0 {
|
||||
ClientData_1_0 *ClientData;
|
||||
ClientCallbacks_1_0 *ClientCallbacks;
|
||||
struct Interface_1_0 *Api;
|
||||
};
|
||||
|
||||
bool instrAutoSamplingStart(
|
||||
InstrEscCbData cbData,
|
||||
void **ppOAInterface);
|
||||
// Dummy functions.
|
||||
using ContextCreateFunction_1_0 = StatusCode(ML_STDCALL *)(ClientType_1_0 clientType, struct ContextCreateData_1_0 *createData, ContextHandle_1_0 *handle);
|
||||
using ContextDeleteFunction_1_0 = StatusCode(ML_STDCALL *)(const ContextHandle_1_0 handle);
|
||||
using GetParameterFunction_1_0 = StatusCode(ML_STDCALL *)(const ParameterType parameter, ValueType *type, TypedValue_1_0 *value);
|
||||
using CommandBufferGetFunction_1_0 = StatusCode(ML_STDCALL *)(const CommandBufferData_1_0 *data);
|
||||
using CommandBufferGetSizeFunction_1_0 = StatusCode(ML_STDCALL *)(const CommandBufferData_1_0 *data, CommandBufferSize_1_0 *size);
|
||||
using QueryCreateFunction_1_0 = StatusCode(ML_STDCALL *)(const QueryCreateData_1_0 *createData, QueryHandle_1_0 *handle);
|
||||
using QueryDeleteFunction_1_0 = StatusCode(ML_STDCALL *)(const QueryHandle_1_0 handle);
|
||||
using ConfigurationCreateFunction_1_0 = StatusCode(ML_STDCALL *)(const ConfigurationCreateData_1_0 *createData, ConfigurationHandle_1_0 *handle);
|
||||
using ConfigurationActivateFunction_1_0 = StatusCode(ML_STDCALL *)(const ConfigurationHandle_1_0 handle, const ConfigurationActivateData_1_0 *activateData);
|
||||
using ConfigurationDeactivateFunction_1_0 = StatusCode(ML_STDCALL *)(const ConfigurationHandle_1_0 handle);
|
||||
using ConfigurationDeleteFunction_1_0 = StatusCode(ML_STDCALL *)(const ConfigurationHandle_1_0 handle);
|
||||
using GetDataFunction_1_0 = StatusCode(ML_STDCALL *)(GetReportData_1_0 *data);
|
||||
|
||||
bool instrAutoSamplingStop(
|
||||
void **ppOAInterface);
|
||||
// Dummy interface.
|
||||
struct Interface_1_0 {
|
||||
GetParameterFunction_1_0 GetParameter;
|
||||
|
||||
bool instrCheckPmRegsCfg(
|
||||
InstrPmRegsCfg *pQueryPmRegsCfg,
|
||||
uint32_t *pLastPmRegsCfgHandle,
|
||||
const void *pASInterface);
|
||||
CommandBufferGetFunction_1_0 CommandBufferGet;
|
||||
CommandBufferGetSizeFunction_1_0 CommandBufferGetSize;
|
||||
|
||||
void instrGetPerfCountersQueryData(
|
||||
InstrEscCbData cbData,
|
||||
GTDI_QUERY *pData,
|
||||
HwPerfCounters *pLayout,
|
||||
uint64_t cpuRawTimestamp,
|
||||
void *pASInterface,
|
||||
InstrPmRegsCfg *pPmRegsCfg,
|
||||
bool useMiRPC,
|
||||
bool resetASData = false,
|
||||
const InstrAllowedContexts *pAllowedContexts = nullptr);
|
||||
QueryCreateFunction_1_0 QueryCreate;
|
||||
QueryDeleteFunction_1_0 QueryDelete;
|
||||
|
||||
bool instrEscGetPmRegsCfg(
|
||||
InstrEscCbData cbData,
|
||||
uint32_t cfgId,
|
||||
InstrPmRegsCfg *pCfg,
|
||||
InstrAutoSamplingMode *pAutoSampling);
|
||||
ConfigurationCreateFunction_1_0 ConfigurationCreate;
|
||||
ConfigurationActivateFunction_1_0 ConfigurationActivate;
|
||||
ConfigurationDeactivateFunction_1_0 ConfigurationDeactivate;
|
||||
ConfigurationDeleteFunction_1_0 ConfigurationDelete;
|
||||
|
||||
bool instrEscHwMetricsEnable(
|
||||
InstrEscCbData cbData,
|
||||
bool enable);
|
||||
|
||||
bool instrEscLoadPmRegsCfg(
|
||||
InstrEscCbData cbData,
|
||||
InstrPmRegsCfg *pCfg,
|
||||
bool hardwareAccess = 1);
|
||||
|
||||
bool instrEscSetPmRegsCfg(
|
||||
InstrEscCbData cbData,
|
||||
uint32_t count,
|
||||
uint32_t *pOffsets,
|
||||
uint32_t *pValues);
|
||||
|
||||
bool instrEscSendReadRegsCfg(
|
||||
InstrEscCbData cbData,
|
||||
uint32_t count,
|
||||
uint32_t *pOffsets,
|
||||
uint32_t *pBitSizes);
|
||||
|
||||
bool instrSetAvailable(bool enabled);
|
||||
|
||||
void instrEscVerifyEnable(
|
||||
InstrEscCbData cbData);
|
||||
|
||||
uint32_t instrSetPlatformInfo(
|
||||
uint32_t productId,
|
||||
void *featureTable);
|
||||
|
||||
} // namespace NEO
|
||||
GetDataFunction_1_0 GetData;
|
||||
};
|
||||
}; // namespace MetricsLibraryApi
|
||||
@@ -14,6 +14,8 @@ set(RUNTIME_SRCS_OS_INTERFACE_BASE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/debug_settings_manager.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/device_factory.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/device_factory.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/metrics_library.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/metrics_library.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_context.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_inc_base.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_interface.h
|
||||
|
||||
@@ -44,6 +44,7 @@ set(RUNTIME_SRCS_OS_INTERFACE_LINUX
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_library.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_library.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_memory_linux.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_metrics_library.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_thread_linux.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_thread_linux.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_time_linux.cpp
|
||||
|
||||
@@ -19,10 +19,6 @@ OSInterface::~OSInterface() {
|
||||
delete osInterfaceImpl;
|
||||
}
|
||||
|
||||
uint32_t OSInterface::getHwContextId() const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool OSInterface::are64kbPagesEnabled() {
|
||||
return osEnabled64kbPages;
|
||||
}
|
||||
|
||||
51
runtime/os_interface/linux/os_metrics_library.cpp
Normal file
51
runtime/os_interface/linux/os_metrics_library.cpp
Normal file
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "runtime/os_interface/metrics_library.h"
|
||||
|
||||
namespace NEO {
|
||||
//////////////////////////////////////////////////////
|
||||
// FUNCTION: MetricsLibrary::oaConfigurationActivate
|
||||
//////////////////////////////////////////////////////
|
||||
bool MetricsLibrary::oaConfigurationActivate(
|
||||
const ConfigurationHandle_1_0 &handle) {
|
||||
ConfigurationActivateData_1_0 data = {};
|
||||
data.Type = GpuConfigurationActivationType::Tbs;
|
||||
|
||||
return api->functions.ConfigurationActivate(
|
||||
handle,
|
||||
&data) == StatusCode::Success;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// FUNCTION: MetricsLibrary::oaConfigurationDeactivate
|
||||
//////////////////////////////////////////////////////
|
||||
bool MetricsLibrary::oaConfigurationDeactivate(
|
||||
const ConfigurationHandle_1_0 &handle) {
|
||||
return api->functions.ConfigurationDeactivate(
|
||||
handle) == StatusCode::Success;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// FUNCTION: MetricsLibrary::userConfigurationCreate
|
||||
//////////////////////////////////////////////////////
|
||||
bool MetricsLibrary::userConfigurationCreate(
|
||||
const ContextHandle_1_0 &context,
|
||||
ConfigurationHandle_1_0 &handle) {
|
||||
// Not supported on Linux.
|
||||
return true;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// FUNCTION: MetricsLibrary::userConfigurationDelete
|
||||
//////////////////////////////////////////////////////
|
||||
bool MetricsLibrary::userConfigurationDelete(
|
||||
const ConfigurationHandle_1_0 &handle) {
|
||||
// Not supported on Linux.
|
||||
return true;
|
||||
}
|
||||
} // namespace NEO
|
||||
@@ -7,66 +7,58 @@
|
||||
|
||||
#include "performance_counters_linux.h"
|
||||
|
||||
#include "runtime/device/device.h"
|
||||
#include "runtime/helpers/hw_helper.h"
|
||||
|
||||
namespace NEO {
|
||||
////////////////////////////////////////////////////
|
||||
// PerformanceCounters::create
|
||||
////////////////////////////////////////////////////
|
||||
std::unique_ptr<PerformanceCounters> PerformanceCounters::create(Device *device) {
|
||||
auto counter = std::make_unique<PerformanceCountersLinux>();
|
||||
auto gen = device->getHardwareInfo().platform.eRenderCoreFamily;
|
||||
auto &hwHelper = HwHelper::get(gen);
|
||||
UNRECOVERABLE_IF(counter == nullptr);
|
||||
|
||||
std::unique_ptr<PerformanceCounters> PerformanceCounters::create(OSTime *osTime) {
|
||||
return std::unique_ptr<PerformanceCounters>(new PerformanceCountersLinux(osTime));
|
||||
}
|
||||
PerformanceCountersLinux::PerformanceCountersLinux(OSTime *osTime) : PerformanceCounters(osTime) {
|
||||
mdLibHandle = nullptr;
|
||||
perfmonLoadConfigFunc = nullptr;
|
||||
counter->clientType.Gen = static_cast<MetricsLibraryApi::ClientGen>(hwHelper.getMetricsLibraryGenId());
|
||||
return counter;
|
||||
}
|
||||
|
||||
PerformanceCountersLinux::~PerformanceCountersLinux() {
|
||||
if (pAutoSamplingInterface) {
|
||||
autoSamplingStopFunc(&pAutoSamplingInterface);
|
||||
pAutoSamplingInterface = nullptr;
|
||||
available = false;
|
||||
}
|
||||
//////////////////////////////////////////////////////
|
||||
// PerformanceCountersLinux::enableCountersConfiguration
|
||||
//////////////////////////////////////////////////////
|
||||
bool PerformanceCountersLinux::enableCountersConfiguration() {
|
||||
// Release previous counters configuration so the user
|
||||
// can change configuration between kernels.
|
||||
releaseCountersConfiguration();
|
||||
|
||||
if (mdLibHandle) {
|
||||
dlcloseFunc(mdLibHandle);
|
||||
mdLibHandle = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void PerformanceCountersLinux::initialize(const HardwareInfo *hwInfo) {
|
||||
PerformanceCounters::initialize(hwInfo);
|
||||
mdLibHandle = dlopenFunc("libmd.so", RTLD_LAZY | RTLD_LOCAL);
|
||||
if (mdLibHandle) {
|
||||
perfmonLoadConfigFunc = reinterpret_cast<perfmonLoadConfig_t>(dlsymFunc(mdLibHandle, "drm_intel_perfmon_load_config"));
|
||||
}
|
||||
setPlatformInfoFunc(hwInfo->platform.eProductFamily, (void *)(&hwInfo->featureTable));
|
||||
}
|
||||
|
||||
void PerformanceCountersLinux::enableImpl() {
|
||||
if (mdLibHandle && perfmonLoadConfigFunc) {
|
||||
PerformanceCounters::enableImpl();
|
||||
}
|
||||
}
|
||||
|
||||
bool PerformanceCountersLinux::verifyPmRegsCfg(InstrPmRegsCfg *pCfg, uint32_t *pLastPmRegsCfgHandle, bool *pLastPmRegsCfgPending) {
|
||||
if (perfmonLoadConfigFunc == nullptr) {
|
||||
// Create oa configuration.
|
||||
if (!metricsLibrary->oaConfigurationCreate(
|
||||
context,
|
||||
oaConfiguration)) {
|
||||
DEBUG_BREAK_IF(true);
|
||||
return false;
|
||||
}
|
||||
if (PerformanceCounters::verifyPmRegsCfg(pCfg, pLastPmRegsCfgHandle, pLastPmRegsCfgPending)) {
|
||||
return getPerfmonConfig(pCfg);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
bool PerformanceCountersLinux::getPerfmonConfig(InstrPmRegsCfg *pCfg) {
|
||||
unsigned int oaCfgHandle = pCfg->OaCounters.Handle;
|
||||
unsigned int gpCfgHandle = pCfg->GpCounters.Handle;
|
||||
int fd = osInterface->get()->getDrm()->getFileDescriptor();
|
||||
if (perfmonLoadConfigFunc(fd, nullptr, &oaCfgHandle, &gpCfgHandle) != 0) {
|
||||
return false;
|
||||
}
|
||||
if (pCfg->OaCounters.Handle != 0 && oaCfgHandle != pCfg->OaCounters.Handle) {
|
||||
return false;
|
||||
}
|
||||
if (pCfg->GpCounters.Handle != 0 && gpCfgHandle != pCfg->GpCounters.Handle) {
|
||||
|
||||
// Enable oa configuration.
|
||||
if (!metricsLibrary->oaConfigurationActivate(
|
||||
oaConfiguration)) {
|
||||
DEBUG_BREAK_IF(true);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// PerformanceCountersLinux::releaseCountersConfiguration
|
||||
//////////////////////////////////////////////////////
|
||||
void PerformanceCountersLinux::releaseCountersConfiguration() {
|
||||
// Oa configuration.
|
||||
if (oaConfiguration.IsValid()) {
|
||||
metricsLibrary->oaConfigurationDeactivate(oaConfiguration);
|
||||
metricsLibrary->oaConfigurationDelete(oaConfiguration);
|
||||
oaConfiguration.data = nullptr;
|
||||
}
|
||||
}
|
||||
} // namespace NEO
|
||||
|
||||
@@ -8,35 +8,17 @@
|
||||
#pragma once
|
||||
#include "runtime/os_interface/performance_counters.h"
|
||||
|
||||
#include "os_interface.h"
|
||||
|
||||
#include <dlfcn.h>
|
||||
|
||||
typedef struct _drm_intel_context drm_intel_context;
|
||||
|
||||
namespace NEO {
|
||||
|
||||
class PerformanceCountersLinux : virtual public PerformanceCounters {
|
||||
public:
|
||||
PerformanceCountersLinux(OSTime *osTime);
|
||||
~PerformanceCountersLinux() override;
|
||||
void initialize(const HardwareInfo *hwInfo) override;
|
||||
void enableImpl() override;
|
||||
PerformanceCountersLinux() = default;
|
||||
~PerformanceCountersLinux() override = default;
|
||||
|
||||
protected:
|
||||
virtual bool getPerfmonConfig(InstrPmRegsCfg *pCfg);
|
||||
bool verifyPmRegsCfg(InstrPmRegsCfg *pCfg, uint32_t *pLastPmRegsCfgHandle, bool *pLastPmRegsCfgPending) override;
|
||||
|
||||
typedef int (*perfmonLoadConfig_t)(int fd, drm_intel_context *ctx, uint32_t *oaCfgId, uint32_t *gpCfgId);
|
||||
typedef void *(*dlopenFunc_t)(const char *, int);
|
||||
typedef void *(*dlsymFunc_t)(void *, const char *);
|
||||
|
||||
void *mdLibHandle;
|
||||
|
||||
perfmonLoadConfig_t perfmonLoadConfigFunc;
|
||||
dlopenFunc_t dlopenFunc = dlopen;
|
||||
dlsymFunc_t dlsymFunc = dlsym;
|
||||
decltype(&dlclose) dlcloseFunc = dlclose;
|
||||
decltype(&instrSetPlatformInfo) setPlatformInfoFunc = instrSetPlatformInfo;
|
||||
/////////////////////////////////////////////////////
|
||||
// Gpu oa/mmio configuration.
|
||||
/////////////////////////////////////////////////////
|
||||
bool enableCountersConfiguration() override;
|
||||
void releaseCountersConfiguration() override;
|
||||
};
|
||||
} // namespace NEO
|
||||
|
||||
188
runtime/os_interface/metrics_library.cpp
Normal file
188
runtime/os_interface/metrics_library.cpp
Normal file
@@ -0,0 +1,188 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "runtime/os_interface/metrics_library.h"
|
||||
|
||||
#include "runtime/helpers/hw_helper.h"
|
||||
#include "runtime/os_interface/os_inc_base.h"
|
||||
|
||||
namespace NEO {
|
||||
///////////////////////////////////////////////////////
|
||||
// FUNCTION: MetricsLibrary::MetricsLibrary
|
||||
///////////////////////////////////////////////////////
|
||||
MetricsLibrary::MetricsLibrary() {
|
||||
api = std::make_unique<MetricsLibraryInterface>();
|
||||
osLibrary.reset(OsLibrary::load(Os::metricsLibraryDllName));
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// FUNCTION: MetricsLibrary::open
|
||||
//////////////////////////////////////////////////////
|
||||
bool MetricsLibrary::open() {
|
||||
|
||||
UNRECOVERABLE_IF(osLibrary.get() == nullptr);
|
||||
|
||||
if (osLibrary->isLoaded()) {
|
||||
api->contextCreate = reinterpret_cast<ContextCreateFunction_1_0>(osLibrary->getProcAddress(METRICS_LIBRARY_CONTEXT_CREATE_1_0));
|
||||
api->contextDelete = reinterpret_cast<ContextDeleteFunction_1_0>(osLibrary->getProcAddress(METRICS_LIBRARY_CONTEXT_DELETE_1_0));
|
||||
} else {
|
||||
api->contextCreate = nullptr;
|
||||
api->contextDelete = nullptr;
|
||||
}
|
||||
|
||||
if (!api->contextCreate) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!api->contextDelete) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// MetricsLibrary::createContext
|
||||
//////////////////////////////////////////////////////
|
||||
bool MetricsLibrary::contextCreate(
|
||||
const ClientType_1_0 &clientType,
|
||||
ClientData_1_0 &clientData,
|
||||
ContextCreateData_1_0 &createData,
|
||||
ContextHandle_1_0 &handle) {
|
||||
|
||||
createData.Api = &api->functions;
|
||||
createData.ClientCallbacks = &api->callbacks;
|
||||
createData.ClientData = &clientData;
|
||||
|
||||
return api->contextCreate(
|
||||
clientType,
|
||||
&createData,
|
||||
&handle) == StatusCode::Success;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// MetricsLibrary::contextDelete
|
||||
//////////////////////////////////////////////////////
|
||||
bool MetricsLibrary::contextDelete(
|
||||
const ContextHandle_1_0 &handle) {
|
||||
return api->contextDelete(handle) == StatusCode::Success;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// MetricsLibrary::hwCountersCreate
|
||||
//////////////////////////////////////////////////////
|
||||
bool MetricsLibrary::hwCountersCreate(
|
||||
const ContextHandle_1_0 &context,
|
||||
const uint32_t slots,
|
||||
const ConfigurationHandle_1_0 user,
|
||||
QueryHandle_1_0 &query) {
|
||||
QueryCreateData_1_0 data = {};
|
||||
data.HandleContext = context;
|
||||
data.Type = ObjectType::QueryHwCounters;
|
||||
data.Slots = slots;
|
||||
|
||||
return api->functions.QueryCreate(
|
||||
&data,
|
||||
&query) == StatusCode::Success;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// MetricsLibrary::hwCountersDelete
|
||||
//////////////////////////////////////////////////////
|
||||
bool MetricsLibrary::hwCountersDelete(
|
||||
const QueryHandle_1_0 &query) {
|
||||
return api->functions.QueryDelete(query) == StatusCode::Success;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// MetricsLibrary::hwCountersGetReport
|
||||
//////////////////////////////////////////////////////
|
||||
bool MetricsLibrary::hwCountersGetReport(
|
||||
const QueryHandle_1_0 &handle,
|
||||
const uint32_t slot,
|
||||
const uint32_t slotsCount,
|
||||
const uint32_t dataSize,
|
||||
void *data) {
|
||||
GetReportData_1_0 report = {};
|
||||
report.Type = ObjectType::QueryHwCounters;
|
||||
report.Query.Handle = handle;
|
||||
report.Query.Slot = slot;
|
||||
report.Query.SlotsCount = slotsCount;
|
||||
report.Query.Data = data;
|
||||
report.Query.DataSize = dataSize;
|
||||
|
||||
return api->functions.GetData(&report) == StatusCode::Success;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// MetricsLibrary::hwCountersGetApiReportSize
|
||||
//////////////////////////////////////////////////////
|
||||
uint32_t MetricsLibrary::hwCountersGetApiReportSize() {
|
||||
ValueType type = ValueType::Uint32;
|
||||
TypedValue_1_0 value = {};
|
||||
|
||||
return api->functions.GetParameter(ParameterType::QueryHwCountersReportApiSize, &type, &value) == StatusCode::Success
|
||||
? value.ValueUInt32
|
||||
: 0;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// MetricsLibrary::hwCountersGetGpuReportSize
|
||||
//////////////////////////////////////////////////////
|
||||
uint32_t MetricsLibrary::hwCountersGetGpuReportSize() {
|
||||
ValueType type = ValueType::Uint32;
|
||||
TypedValue_1_0 value = {};
|
||||
|
||||
return api->functions.GetParameter(ParameterType::QueryHwCountersReportGpuSize, &type, &value) == StatusCode::Success
|
||||
? value.ValueUInt32
|
||||
: 0;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// MetricsLibrary::commandBufferGet
|
||||
//////////////////////////////////////////////////////
|
||||
bool MetricsLibrary::commandBufferGet(
|
||||
CommandBufferData_1_0 &data) {
|
||||
return api->functions.CommandBufferGet(
|
||||
&data) == StatusCode::Success;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// MetricsLibrary::commandBufferGetSize
|
||||
//////////////////////////////////////////////////////
|
||||
bool MetricsLibrary::commandBufferGetSize(
|
||||
const CommandBufferData_1_0 &commandBufferData,
|
||||
CommandBufferSize_1_0 &commandBufferSize) {
|
||||
return api->functions.CommandBufferGetSize(
|
||||
&commandBufferData,
|
||||
&commandBufferSize) == StatusCode::Success;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// MetricsLibrary::oaConfigurationCreate
|
||||
//////////////////////////////////////////////////////
|
||||
bool MetricsLibrary::oaConfigurationCreate(
|
||||
const ContextHandle_1_0 &context,
|
||||
ConfigurationHandle_1_0 &handle) {
|
||||
ConfigurationCreateData_1_0 data = {};
|
||||
data.HandleContext = context;
|
||||
data.Type = ObjectType::ConfigurationHwCountersOa;
|
||||
|
||||
return api->functions.ConfigurationCreate(
|
||||
&data,
|
||||
&handle) == StatusCode::Success;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// MetricsLibrary::oaConfigurationDelete
|
||||
//////////////////////////////////////////////////////
|
||||
bool MetricsLibrary::oaConfigurationDelete(
|
||||
const ConfigurationHandle_1_0 &handle) {
|
||||
|
||||
return api->functions.ConfigurationDelete(handle) == StatusCode::Success;
|
||||
}
|
||||
} // namespace NEO
|
||||
89
runtime/os_interface/metrics_library.h
Normal file
89
runtime/os_interface/metrics_library.h
Normal file
@@ -0,0 +1,89 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "runtime/os_interface/os_library.h"
|
||||
|
||||
#include "instrumentation.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
using MetricsLibraryApi::ClientApi;
|
||||
using MetricsLibraryApi::ClientCallbacks_1_0;
|
||||
using MetricsLibraryApi::ClientData_1_0;
|
||||
using MetricsLibraryApi::ClientGen;
|
||||
using MetricsLibraryApi::ClientType_1_0;
|
||||
using MetricsLibraryApi::CommandBufferData_1_0;
|
||||
using MetricsLibraryApi::CommandBufferSize_1_0;
|
||||
using MetricsLibraryApi::ConfigurationActivateData_1_0;
|
||||
using MetricsLibraryApi::ConfigurationCreateData_1_0;
|
||||
using MetricsLibraryApi::ConfigurationHandle_1_0;
|
||||
using MetricsLibraryApi::ContextCreateData_1_0;
|
||||
using MetricsLibraryApi::ContextCreateFunction_1_0;
|
||||
using MetricsLibraryApi::ContextDeleteFunction_1_0;
|
||||
using MetricsLibraryApi::ContextHandle_1_0;
|
||||
using MetricsLibraryApi::GetReportData_1_0;
|
||||
using MetricsLibraryApi::GpuConfigurationActivationType;
|
||||
using MetricsLibraryApi::GpuMemory_1_0;
|
||||
using MetricsLibraryApi::Interface_1_0;
|
||||
using MetricsLibraryApi::ObjectType;
|
||||
using MetricsLibraryApi::ParameterType;
|
||||
using MetricsLibraryApi::QueryCreateData_1_0;
|
||||
using MetricsLibraryApi::QueryHandle_1_0;
|
||||
using MetricsLibraryApi::StatusCode;
|
||||
using MetricsLibraryApi::TypedValue_1_0;
|
||||
using MetricsLibraryApi::ValueType;
|
||||
|
||||
class MetricsLibraryInterface {
|
||||
public:
|
||||
ContextCreateFunction_1_0 contextCreate = nullptr;
|
||||
ContextDeleteFunction_1_0 contextDelete = nullptr;
|
||||
Interface_1_0 functions = {};
|
||||
ClientCallbacks_1_0 callbacks = {};
|
||||
};
|
||||
|
||||
class MetricsLibrary {
|
||||
public:
|
||||
MetricsLibrary();
|
||||
MOCKABLE_VIRTUAL ~MetricsLibrary(){};
|
||||
|
||||
// Library open function.
|
||||
MOCKABLE_VIRTUAL bool open();
|
||||
|
||||
// Context create / destroy functions.
|
||||
MOCKABLE_VIRTUAL bool contextCreate(const ClientType_1_0 &client, ClientData_1_0 &clientData, ContextCreateData_1_0 &createData, ContextHandle_1_0 &handle);
|
||||
MOCKABLE_VIRTUAL bool contextDelete(const ContextHandle_1_0 &handle);
|
||||
|
||||
// HwCounters functions.
|
||||
MOCKABLE_VIRTUAL bool hwCountersCreate(const ContextHandle_1_0 &context, const uint32_t slots, const ConfigurationHandle_1_0 mmio, QueryHandle_1_0 &handle);
|
||||
MOCKABLE_VIRTUAL bool hwCountersDelete(const QueryHandle_1_0 &handle);
|
||||
MOCKABLE_VIRTUAL bool hwCountersGetReport(const QueryHandle_1_0 &handle, const uint32_t slot, const uint32_t slotsCount, const uint32_t dataSize, void *data);
|
||||
MOCKABLE_VIRTUAL uint32_t hwCountersGetApiReportSize();
|
||||
MOCKABLE_VIRTUAL uint32_t hwCountersGetGpuReportSize();
|
||||
|
||||
// Oa configuration functions.
|
||||
MOCKABLE_VIRTUAL bool oaConfigurationCreate(const ContextHandle_1_0 &context, ConfigurationHandle_1_0 &handle);
|
||||
MOCKABLE_VIRTUAL bool oaConfigurationDelete(const ConfigurationHandle_1_0 &handle);
|
||||
MOCKABLE_VIRTUAL bool oaConfigurationActivate(const ConfigurationHandle_1_0 &handle);
|
||||
MOCKABLE_VIRTUAL bool oaConfigurationDeactivate(const ConfigurationHandle_1_0 &handle);
|
||||
|
||||
// User mmio configuration functions.
|
||||
MOCKABLE_VIRTUAL bool userConfigurationCreate(const ContextHandle_1_0 &context, ConfigurationHandle_1_0 &handle);
|
||||
MOCKABLE_VIRTUAL bool userConfigurationDelete(const ConfigurationHandle_1_0 &handle);
|
||||
|
||||
// Command buffer functions.
|
||||
MOCKABLE_VIRTUAL bool commandBufferGet(CommandBufferData_1_0 &data);
|
||||
MOCKABLE_VIRTUAL bool commandBufferGetSize(const CommandBufferData_1_0 &commandBufferData, CommandBufferSize_1_0 &commandBufferSize);
|
||||
|
||||
public:
|
||||
std::unique_ptr<OsLibrary> osLibrary;
|
||||
std::unique_ptr<MetricsLibraryInterface> api;
|
||||
};
|
||||
} // namespace NEO
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2018 Intel Corporation
|
||||
* Copyright (C) 2017-2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -18,4 +18,7 @@ extern const char *testDllName;
|
||||
extern const char *fileSeparator;
|
||||
// Pci Path
|
||||
extern const char *sysFsPciPath;
|
||||
|
||||
// Os specific Metrics Library name
|
||||
extern const char *metricsLibraryDllName;
|
||||
}; // namespace Os
|
||||
|
||||
@@ -21,7 +21,6 @@ class OSInterface {
|
||||
OSInterfaceImpl *get() const {
|
||||
return osInterfaceImpl;
|
||||
};
|
||||
unsigned int getHwContextId() const;
|
||||
static bool osEnabled64kbPages;
|
||||
static bool osEnableLocalMemory;
|
||||
static bool are64kbPagesEnabled();
|
||||
|
||||
@@ -7,175 +7,240 @@
|
||||
|
||||
#include "runtime/os_interface/performance_counters.h"
|
||||
|
||||
#include "runtime/helpers/debug_helpers.h"
|
||||
#include "runtime/os_interface/os_interface.h"
|
||||
#include "runtime/os_interface/os_time.h"
|
||||
#include "runtime/utilities/tag_allocator.h"
|
||||
|
||||
#include "CL/cl.h"
|
||||
using namespace MetricsLibraryApi;
|
||||
|
||||
namespace NEO {
|
||||
decltype(&instrGetPerfCountersQueryData) getPerfCountersQueryDataFactory[IGFX_MAX_CORE] = {
|
||||
nullptr,
|
||||
};
|
||||
size_t perfCountersQuerySize[IGFX_MAX_CORE] = {
|
||||
0,
|
||||
};
|
||||
|
||||
PerformanceCounters::PerformanceCounters(OSTime *osTime) {
|
||||
this->osTime = osTime;
|
||||
DEBUG_BREAK_IF(osTime == nullptr);
|
||||
gfxFamily = IGFX_UNKNOWN_CORE;
|
||||
cbData = {
|
||||
0,
|
||||
};
|
||||
this->osInterface = osTime->getOSInterface();
|
||||
hwMetricsEnabled = false;
|
||||
useMIRPC = false;
|
||||
pAutoSamplingInterface = nullptr;
|
||||
cpuRawTimestamp = 0;
|
||||
refCounter = 0;
|
||||
available = false;
|
||||
reportId = 0;
|
||||
//////////////////////////////////////////////////////
|
||||
// PerformanceCounters constructor.
|
||||
//////////////////////////////////////////////////////
|
||||
PerformanceCounters::PerformanceCounters() {
|
||||
metricsLibrary = std::make_unique<MetricsLibrary>();
|
||||
UNRECOVERABLE_IF(metricsLibrary == nullptr);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// PerformanceCounters::getReferenceNumber
|
||||
//////////////////////////////////////////////////////
|
||||
uint32_t PerformanceCounters::getReferenceNumber() {
|
||||
std::lock_guard<std::mutex> lockMutex(mutex);
|
||||
return referenceCounter;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// PerformanceCounters::isAvailable
|
||||
//////////////////////////////////////////////////////
|
||||
bool PerformanceCounters::isAvailable() {
|
||||
return available;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// PerformanceCounters::enable
|
||||
//////////////////////////////////////////////////////
|
||||
void PerformanceCounters::enable() {
|
||||
mutex.lock();
|
||||
std::lock_guard<std::mutex> lg(mutex, std::adopt_lock);
|
||||
if (refCounter == 0) {
|
||||
enableImpl();
|
||||
std::lock_guard<std::mutex> lockMutex(mutex);
|
||||
|
||||
if (referenceCounter == 0) {
|
||||
available = openMetricsLibrary();
|
||||
}
|
||||
refCounter++;
|
||||
|
||||
referenceCounter++;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// PerformanceCounters::shutdown
|
||||
//////////////////////////////////////////////////////
|
||||
void PerformanceCounters::shutdown() {
|
||||
mutex.lock();
|
||||
std::lock_guard<std::mutex> lg(mutex, std::adopt_lock);
|
||||
if (refCounter >= 1) {
|
||||
if (refCounter == 1) {
|
||||
shutdownImpl();
|
||||
std::lock_guard<std::mutex> lockMutex(mutex);
|
||||
|
||||
if (referenceCounter >= 1) {
|
||||
if (referenceCounter == 1) {
|
||||
available = false;
|
||||
closeMetricsLibrary();
|
||||
}
|
||||
refCounter--;
|
||||
referenceCounter--;
|
||||
}
|
||||
}
|
||||
|
||||
void PerformanceCounters::initialize(const HardwareInfo *hwInfo) {
|
||||
useMIRPC = !(hwInfo->workaroundTable.waDoNotUseMIReportPerfCount);
|
||||
gfxFamily = hwInfo->platform.eRenderCoreFamily;
|
||||
//////////////////////////////////////////////////////
|
||||
// PerformanceCounters::getMetricsLibraryInterface
|
||||
//////////////////////////////////////////////////////
|
||||
MetricsLibrary *PerformanceCounters::getMetricsLibraryInterface() {
|
||||
return metricsLibrary.get();
|
||||
}
|
||||
|
||||
if (getPerfCountersQueryDataFactory[gfxFamily] != nullptr) {
|
||||
getPerfCountersQueryDataFunc = getPerfCountersQueryDataFactory[gfxFamily];
|
||||
} else {
|
||||
perfCountersQuerySize[gfxFamily] = sizeof(GTDI_QUERY);
|
||||
//////////////////////////////////////////////////////
|
||||
// PerformanceCounters::setMetricsLibraryInterface
|
||||
//////////////////////////////////////////////////////
|
||||
void PerformanceCounters::setMetricsLibraryInterface(std::unique_ptr<MetricsLibrary> newMetricsLibrary) {
|
||||
metricsLibrary = std::move(newMetricsLibrary);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// PerformanceCounters::getMetricsLibraryContext
|
||||
//////////////////////////////////////////////////////
|
||||
ContextHandle_1_0 PerformanceCounters::getMetricsLibraryContext() {
|
||||
return context;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// PerformanceCounters::openMetricsLibrary
|
||||
//////////////////////////////////////////////////////
|
||||
bool PerformanceCounters::openMetricsLibrary() {
|
||||
|
||||
// Open metrics library.
|
||||
bool result = metricsLibrary->open();
|
||||
DEBUG_BREAK_IF(!result);
|
||||
|
||||
// Create metrics library context.
|
||||
if (result) {
|
||||
result = metricsLibrary->contextCreate(
|
||||
clientType,
|
||||
clientData,
|
||||
contextData,
|
||||
context);
|
||||
|
||||
// Validate gpu report size.
|
||||
DEBUG_BREAK_IF(!metricsLibrary->hwCountersGetGpuReportSize());
|
||||
}
|
||||
|
||||
// Error handling.
|
||||
if (!result) {
|
||||
closeMetricsLibrary();
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// PerformanceCounters::closeMetricsLibrary
|
||||
//////////////////////////////////////////////////////
|
||||
void PerformanceCounters::closeMetricsLibrary() {
|
||||
// Destroy oa/user mmio configuration.
|
||||
releaseCountersConfiguration();
|
||||
|
||||
// Destroy hw counters query.
|
||||
if (query.IsValid()) {
|
||||
metricsLibrary->hwCountersDelete(query);
|
||||
}
|
||||
|
||||
// Destroy metrics library context.
|
||||
if (context.IsValid()) {
|
||||
metricsLibrary->contextDelete(context);
|
||||
}
|
||||
}
|
||||
void PerformanceCounters::enableImpl() {
|
||||
hwMetricsEnabled = hwMetricsEnableFunc(cbData, true);
|
||||
|
||||
if (!pAutoSamplingInterface && hwMetricsEnabled) {
|
||||
autoSamplingStartFunc(cbData, &pAutoSamplingInterface);
|
||||
if (pAutoSamplingInterface) {
|
||||
available = true;
|
||||
//////////////////////////////////////////////////////
|
||||
// PerformanceCounters::getQueryHandle
|
||||
//////////////////////////////////////////////////////
|
||||
QueryHandle_1_0 PerformanceCounters::getQueryHandle() {
|
||||
if (!query.IsValid()) {
|
||||
metricsLibrary->hwCountersCreate(
|
||||
context,
|
||||
1,
|
||||
userConfiguration,
|
||||
query);
|
||||
}
|
||||
|
||||
DEBUG_BREAK_IF(!query.IsValid());
|
||||
return query;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// PerformanceCounters::getGpuCommandsSize
|
||||
//////////////////////////////////////////////////////
|
||||
uint32_t PerformanceCounters::getGpuCommandsSize(
|
||||
const bool begin) {
|
||||
CommandBufferData_1_0 bufferData = {};
|
||||
CommandBufferSize_1_0 bufferSize = {};
|
||||
|
||||
if (begin) {
|
||||
// Load currently activated (through metrics discovery) oa/user mmio configuration and use it.
|
||||
// It will allow to change counters configuration between subsequent clEnqueueNDCommandRange calls.
|
||||
if (!enableCountersConfiguration()) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
bufferData.HandleContext = context;
|
||||
bufferData.Type = GpuCommandBufferType::Render;
|
||||
bufferData.CommandsType = ObjectType::QueryHwCounters;
|
||||
|
||||
bufferData.QueryHwCounters.Begin = begin;
|
||||
bufferData.QueryHwCounters.Handle = getQueryHandle();
|
||||
bufferData.QueryHwCounters.HandleUserConfiguration = userConfiguration;
|
||||
|
||||
return metricsLibrary->commandBufferGetSize(bufferData, bufferSize)
|
||||
? bufferSize.GpuMemorySize
|
||||
: 0;
|
||||
}
|
||||
void PerformanceCounters::shutdownImpl() {
|
||||
if (hwMetricsEnabled) {
|
||||
hwMetricsEnableFunc(cbData, false);
|
||||
hwMetricsEnabled = false;
|
||||
}
|
||||
if (pAutoSamplingInterface) {
|
||||
autoSamplingStopFunc(&pAutoSamplingInterface);
|
||||
pAutoSamplingInterface = nullptr;
|
||||
available = false;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// PerformanceCounters::getGpuCommands
|
||||
//////////////////////////////////////////////////////
|
||||
bool PerformanceCounters::getGpuCommands(
|
||||
TagNode<HwPerfCounter> &performanceCounters,
|
||||
const bool begin,
|
||||
const uint32_t bufferSize,
|
||||
void *pBuffer) {
|
||||
// Command Buffer data.
|
||||
CommandBufferData_1_0 bufferData = {};
|
||||
bufferData.HandleContext = context;
|
||||
bufferData.Type = GpuCommandBufferType::Render;
|
||||
bufferData.CommandsType = ObjectType::QueryHwCounters;
|
||||
bufferData.Data = pBuffer;
|
||||
bufferData.Size = bufferSize;
|
||||
|
||||
// Gpu memory allocation for query hw counters.
|
||||
bufferData.Allocation.CpuAddress = reinterpret_cast<uint8_t *>(performanceCounters.tagForCpuAccess);
|
||||
bufferData.Allocation.GpuAddress = performanceCounters.getGpuAddress();
|
||||
|
||||
// Query hw counters specific data.
|
||||
bufferData.QueryHwCounters.Begin = begin;
|
||||
bufferData.QueryHwCounters.Handle = getQueryHandle();
|
||||
bufferData.QueryHwCounters.HandleUserConfiguration = userConfiguration;
|
||||
|
||||
return metricsLibrary->commandBufferGet(bufferData);
|
||||
}
|
||||
void PerformanceCounters::setCpuTimestamp() {
|
||||
cpuRawTimestamp = osTime->getCpuRawTimestamp();
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// PerformanceCounters::getApiReportSize
|
||||
//////////////////////////////////////////////////////
|
||||
uint32_t PerformanceCounters::getApiReportSize() {
|
||||
return metricsLibrary->hwCountersGetApiReportSize();
|
||||
}
|
||||
InstrPmRegsCfg *PerformanceCounters::getPmRegsCfg(uint32_t configuration) {
|
||||
if (!hwMetricsEnabled) {
|
||||
return nullptr;
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// PerformanceCounters::getGpuReportSize
|
||||
//////////////////////////////////////////////////////
|
||||
uint32_t PerformanceCounters::getGpuReportSize() {
|
||||
return metricsLibrary->hwCountersGetGpuReportSize();
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// PerformanceCounters::getApiReport
|
||||
//////////////////////////////////////////////////////
|
||||
bool PerformanceCounters::getApiReport(const size_t inputParamSize, void *pInputParam, size_t *pOutputParamSize, bool isEventComplete) {
|
||||
const uint32_t outputSize = metricsLibrary->hwCountersGetApiReportSize();
|
||||
|
||||
if (pOutputParamSize) {
|
||||
*pOutputParamSize = outputSize;
|
||||
}
|
||||
|
||||
switch (configuration) {
|
||||
case GTDI_CONFIGURATION_SET_DYNAMIC:
|
||||
case GTDI_CONFIGURATION_SET_1:
|
||||
case GTDI_CONFIGURATION_SET_2:
|
||||
case GTDI_CONFIGURATION_SET_3:
|
||||
break;
|
||||
|
||||
default:
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
InstrPmRegsCfg *pPmRegsCfg = new InstrPmRegsCfg();
|
||||
pPmRegsCfg->OaCounters.Handle = INSTR_PM_REGS_CFG_INVALID;
|
||||
|
||||
mutex.lock();
|
||||
std::lock_guard<std::mutex> lg(mutex, std::adopt_lock);
|
||||
|
||||
if (getPmRegsCfgFunc(cbData, configuration, pPmRegsCfg, nullptr)) {
|
||||
return pPmRegsCfg;
|
||||
}
|
||||
delete pPmRegsCfg;
|
||||
return nullptr;
|
||||
}
|
||||
bool PerformanceCounters::verifyPmRegsCfg(InstrPmRegsCfg *pCfg, uint32_t *pLastPmRegsCfgHandle, bool *pLastPmRegsCfgPending) {
|
||||
if (pCfg == nullptr || pLastPmRegsCfgHandle == nullptr || pLastPmRegsCfgPending == nullptr) {
|
||||
return false;
|
||||
}
|
||||
if (checkPmRegsCfgFunc(pCfg, pLastPmRegsCfgHandle, pAutoSamplingInterface)) {
|
||||
if (loadPmRegsCfgFunc(cbData, pCfg, 1)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
bool PerformanceCounters::sendPmRegsCfgCommands(InstrPmRegsCfg *pCfg, uint32_t *pLastPmRegsCfgHandle, bool *pLastPmRegsCfgPending) {
|
||||
if (verifyPmRegsCfg(pCfg, pLastPmRegsCfgHandle, pLastPmRegsCfgPending)) {
|
||||
*pLastPmRegsCfgPending = true;
|
||||
if (pInputParam == nullptr && inputParamSize == 0 && pOutputParamSize) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
bool PerformanceCounters::processEventReport(size_t inputParamSize, void *inputParam, size_t *outputParamSize, HwPerfCounter *pPrivateData, InstrPmRegsCfg *countersConfiguration, bool isEventComplete) {
|
||||
size_t outputSize = perfCountersQuerySize[gfxFamily];
|
||||
if (outputParamSize) {
|
||||
*outputParamSize = outputSize;
|
||||
}
|
||||
if (inputParam == nullptr && inputParamSize == 0 && outputParamSize) {
|
||||
return true;
|
||||
}
|
||||
if (inputParam == nullptr || isEventComplete == false) {
|
||||
|
||||
if (pInputParam == nullptr || isEventComplete == false) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (inputParamSize < outputSize) {
|
||||
return false;
|
||||
}
|
||||
GTDI_QUERY *pClientData = static_cast<GTDI_QUERY *>(inputParam);
|
||||
getPerfCountersQueryDataFunc(cbData, pClientData, &pPrivateData->HWPerfCounters,
|
||||
cpuRawTimestamp, pAutoSamplingInterface, countersConfiguration, useMIRPC, true, nullptr);
|
||||
return true;
|
||||
}
|
||||
|
||||
int PerformanceCounters::sendPerfConfiguration(uint32_t count, uint32_t *pOffsets, uint32_t *pValues) {
|
||||
bool ret = false;
|
||||
|
||||
if (count == 0 || pOffsets == NULL || pValues == NULL) {
|
||||
return CL_INVALID_VALUE;
|
||||
}
|
||||
|
||||
mutex.lock();
|
||||
std::lock_guard<std::mutex> lg(mutex, std::adopt_lock);
|
||||
if (pOffsets[0] != INSTR_READ_REGS_CFG_TAG) {
|
||||
ret = setPmRegsCfgFunc(cbData, count, pOffsets, pValues);
|
||||
} else if (count > 1) {
|
||||
ret = sendReadRegsCfgFunc(cbData, count - 1, pOffsets + 1, pValues + 1);
|
||||
}
|
||||
|
||||
return ret ? CL_SUCCESS : CL_PROFILING_INFO_NOT_AVAILABLE;
|
||||
}
|
||||
|
||||
uint32_t PerformanceCounters::getCurrentReportId() {
|
||||
return (osInterface->getHwContextId() << 12) | getReportId();
|
||||
return metricsLibrary->hwCountersGetReport(query, 0, 1, outputSize, pInputParam);
|
||||
}
|
||||
} // namespace NEO
|
||||
|
||||
@@ -7,71 +7,105 @@
|
||||
|
||||
#pragma once
|
||||
#include "runtime/event/perf_counter.h"
|
||||
#include "runtime/helpers/hw_info.h"
|
||||
#include "runtime/os_interface/metrics_library.h"
|
||||
|
||||
#include "CL/cl.h"
|
||||
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
|
||||
namespace NEO {
|
||||
struct HardwareInfo;
|
||||
class OSInterface;
|
||||
class OSTime;
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Forward declaration.
|
||||
//////////////////////////////////////////////////////
|
||||
template <typename Node>
|
||||
struct TagNode;
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Performance counters implementation.
|
||||
//////////////////////////////////////////////////////
|
||||
class PerformanceCounters {
|
||||
public:
|
||||
static std::unique_ptr<PerformanceCounters> create(OSTime *osTime);
|
||||
//////////////////////////////////////////////////////
|
||||
// Constructor/destructor.
|
||||
//////////////////////////////////////////////////////
|
||||
PerformanceCounters();
|
||||
virtual ~PerformanceCounters() = default;
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Performance counters creation.
|
||||
//////////////////////////////////////////////////////
|
||||
static std::unique_ptr<PerformanceCounters> create(class Device *device);
|
||||
void enable();
|
||||
void shutdown();
|
||||
virtual void initialize(const HardwareInfo *hwInfo);
|
||||
InstrPmRegsCfg *getPmRegsCfg(uint32_t configuration);
|
||||
bool sendPmRegsCfgCommands(InstrPmRegsCfg *pCfg, uint32_t *pLastPmRegsCfgHandle, bool *pLastPmRegsCfgPending);
|
||||
void setCpuTimestamp();
|
||||
bool processEventReport(size_t pClientDataSize, void *pClientData, size_t *outputSize, HwPerfCounter *pPrivateData, InstrPmRegsCfg *countersConfiguration, bool isEventComplete);
|
||||
int sendPerfConfiguration(uint32_t count, uint32_t *pOffsets, uint32_t *pValues);
|
||||
uint32_t getCurrentReportId();
|
||||
bool isAvailable();
|
||||
uint32_t getReferenceNumber();
|
||||
|
||||
uint32_t getPerfCountersReferenceNumber() {
|
||||
mutex.lock();
|
||||
std::lock_guard<std::mutex> lg(mutex, std::adopt_lock);
|
||||
/////////////////////////////////////////////////////
|
||||
// Gpu oa/mmio configuration.
|
||||
/////////////////////////////////////////////////////
|
||||
virtual bool enableCountersConfiguration() = 0;
|
||||
virtual void releaseCountersConfiguration() = 0;
|
||||
|
||||
return refCounter;
|
||||
}
|
||||
//////////////////////////////////////////////////////
|
||||
// Gpu commands.
|
||||
//////////////////////////////////////////////////////
|
||||
uint32_t getGpuCommandsSize(const bool begin);
|
||||
bool getGpuCommands(TagNode<HwPerfCounter> &performanceCounters, const bool begin, const uint32_t bufferSize, void *pBuffer);
|
||||
|
||||
bool isAvailable() {
|
||||
return available;
|
||||
}
|
||||
/////////////////////////////////////////////////////
|
||||
// Gpu/Api reports.
|
||||
/////////////////////////////////////////////////////
|
||||
uint32_t getApiReportSize();
|
||||
uint32_t getGpuReportSize();
|
||||
bool getApiReport(const size_t inputParamSize, void *pClientData, size_t *pOutputSize, bool isEventComplete);
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Metrics Library interface.
|
||||
/////////////////////////////////////////////////////
|
||||
MetricsLibrary *getMetricsLibraryInterface();
|
||||
void setMetricsLibraryInterface(std::unique_ptr<MetricsLibrary> newMetricsLibrary);
|
||||
bool openMetricsLibrary();
|
||||
void closeMetricsLibrary();
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Metrics Library context/query handles.
|
||||
/////////////////////////////////////////////////////
|
||||
ContextHandle_1_0 getMetricsLibraryContext();
|
||||
QueryHandle_1_0 getQueryHandle();
|
||||
|
||||
protected:
|
||||
PerformanceCounters(OSTime *osTime);
|
||||
virtual bool verifyPmRegsCfg(InstrPmRegsCfg *pCfg, uint32_t *pLastPmRegsCfgHandle, bool *pLastPmRegsCfgPending);
|
||||
virtual void enableImpl();
|
||||
void shutdownImpl();
|
||||
MOCKABLE_VIRTUAL uint32_t getReportId() {
|
||||
return ++reportId & 0xFFF;
|
||||
}
|
||||
GFXCORE_FAMILY gfxFamily;
|
||||
InstrEscCbData cbData;
|
||||
OSInterface *osInterface;
|
||||
OSTime *osTime;
|
||||
bool hwMetricsEnabled;
|
||||
bool useMIRPC;
|
||||
void *pAutoSamplingInterface;
|
||||
uint64_t cpuRawTimestamp;
|
||||
/////////////////////////////////////////////////////
|
||||
// Common members.
|
||||
/////////////////////////////////////////////////////
|
||||
std::mutex mutex;
|
||||
uint32_t refCounter;
|
||||
bool available;
|
||||
uint32_t reportId;
|
||||
decltype(&instrAutoSamplingStart) autoSamplingStartFunc = instrAutoSamplingStart;
|
||||
decltype(&instrAutoSamplingStop) autoSamplingStopFunc = instrAutoSamplingStop;
|
||||
decltype(&instrCheckPmRegsCfg) checkPmRegsCfgFunc = instrCheckPmRegsCfg;
|
||||
decltype(&instrGetPerfCountersQueryData) getPerfCountersQueryDataFunc = instrGetPerfCountersQueryData;
|
||||
decltype(&instrEscGetPmRegsCfg) getPmRegsCfgFunc = instrEscGetPmRegsCfg;
|
||||
decltype(&instrEscHwMetricsEnable) hwMetricsEnableFunc = instrEscHwMetricsEnable;
|
||||
decltype(&instrEscLoadPmRegsCfg) loadPmRegsCfgFunc = instrEscLoadPmRegsCfg;
|
||||
decltype(&instrEscSetPmRegsCfg) setPmRegsCfgFunc = instrEscSetPmRegsCfg;
|
||||
decltype(&instrEscSendReadRegsCfg) sendReadRegsCfgFunc = instrEscSendReadRegsCfg;
|
||||
uint32_t referenceCounter = 0;
|
||||
bool available = false;
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Metrics Library interface.
|
||||
/////////////////////////////////////////////////////
|
||||
std::unique_ptr<MetricsLibrary> metricsLibrary = {};
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Metrics Library client data.
|
||||
/////////////////////////////////////////////////////
|
||||
ClientData_1_0 clientData = {};
|
||||
ClientType_1_0 clientType = {ClientApi::OpenCL, ClientGen::Unknown};
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Metrics Library context.
|
||||
/////////////////////////////////////////////////////
|
||||
ContextCreateData_1_0 contextData = {};
|
||||
ContextHandle_1_0 context = {};
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Metrics Library oa/mmio counters configuration.
|
||||
/////////////////////////////////////////////////////
|
||||
ConfigurationHandle_1_0 oaConfiguration = {};
|
||||
ConfigurationHandle_1_0 userConfiguration = {};
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// Metrics Library query object.
|
||||
/////////////////////////////////////////////////////
|
||||
QueryHandle_1_0 query = {};
|
||||
};
|
||||
} // namespace NEO
|
||||
|
||||
@@ -36,6 +36,7 @@ set(RUNTIME_SRCS_OS_INTERFACE_WINDOWS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_library.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_library.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_memory_win.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_metrics_library.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_socket.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_thread_win.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/os_thread_win.h
|
||||
|
||||
@@ -22,10 +22,6 @@ OSInterface::~OSInterface() {
|
||||
delete osInterfaceImpl;
|
||||
}
|
||||
|
||||
uint32_t OSInterface::getHwContextId() const {
|
||||
return osInterfaceImpl->getHwContextId();
|
||||
}
|
||||
|
||||
uint32_t OSInterface::getDeviceHandle() const {
|
||||
return static_cast<uint32_t>(osInterfaceImpl->getDeviceHandle());
|
||||
}
|
||||
|
||||
55
runtime/os_interface/windows/os_metrics_library.cpp
Normal file
55
runtime/os_interface/windows/os_metrics_library.cpp
Normal file
@@ -0,0 +1,55 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2019 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "runtime/os_interface/metrics_library.h"
|
||||
|
||||
namespace NEO {
|
||||
//////////////////////////////////////////////////////
|
||||
// FUNCTION: MetricsLibrary::oaConfigurationActivate
|
||||
//////////////////////////////////////////////////////
|
||||
bool MetricsLibrary::oaConfigurationActivate(
|
||||
const ConfigurationHandle_1_0 &handle) {
|
||||
ConfigurationActivateData_1_0 data = {};
|
||||
data.Type = GpuConfigurationActivationType::EscapeCode;
|
||||
|
||||
return api->functions.ConfigurationActivate(
|
||||
handle,
|
||||
&data) == StatusCode::Success;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// FUNCTION: MetricsLibrary::oaConfigurationDeactivate
|
||||
//////////////////////////////////////////////////////
|
||||
bool MetricsLibrary::oaConfigurationDeactivate(
|
||||
const ConfigurationHandle_1_0 &handle) {
|
||||
return api->functions.ConfigurationDeactivate(
|
||||
handle) == StatusCode::Success;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// FUNCTION: MetricsLibrary::userConfigurationCreate
|
||||
//////////////////////////////////////////////////////
|
||||
bool MetricsLibrary::userConfigurationCreate(
|
||||
const ContextHandle_1_0 &context,
|
||||
ConfigurationHandle_1_0 &handle) {
|
||||
ConfigurationCreateData_1_0 data = {};
|
||||
data.HandleContext = context;
|
||||
data.Type = ObjectType::ConfigurationHwCountersUser;
|
||||
|
||||
return api->functions.ConfigurationCreate(
|
||||
&data,
|
||||
&handle) == StatusCode::Success;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// FUNCTION: MetricsLibrary::userConfigurationDelete
|
||||
//////////////////////////////////////////////////////
|
||||
bool MetricsLibrary::userConfigurationDelete(
|
||||
const ConfigurationHandle_1_0 &handle) {
|
||||
return api->functions.ConfigurationDelete(handle) == StatusCode::Success;
|
||||
}
|
||||
} // namespace NEO
|
||||
@@ -7,31 +7,82 @@
|
||||
|
||||
#include "performance_counters_win.h"
|
||||
|
||||
#include "runtime/device/device.h"
|
||||
#include "runtime/helpers/hw_helper.h"
|
||||
#include "runtime/os_interface/windows/os_interface.h"
|
||||
#include "runtime/os_interface/windows/windows_wrapper.h"
|
||||
#include "runtime/os_interface/windows/os_time_win.h"
|
||||
|
||||
namespace NEO {
|
||||
std::unique_ptr<PerformanceCounters> PerformanceCounters::create(OSTime *osTime) {
|
||||
return std::unique_ptr<PerformanceCounters>(new PerformanceCountersWin(osTime));
|
||||
}
|
||||
PerformanceCountersWin::PerformanceCountersWin(OSTime *osTime) : PerformanceCounters(osTime) {
|
||||
cbData.hAdapter = (void *)(UINT_PTR)osInterface->get()->getAdapterHandle();
|
||||
cbData.hDevice = (void *)(UINT_PTR)osInterface->get()->getDeviceHandle();
|
||||
cbData.pfnEscapeCb = osInterface->get()->getEscapeHandle();
|
||||
/////////////////////////////////////////////////////
|
||||
// PerformanceCounters::create
|
||||
/////////////////////////////////////////////////////
|
||||
std::unique_ptr<PerformanceCounters> PerformanceCounters::create(Device *device) {
|
||||
auto counter = std::make_unique<PerformanceCountersWin>();
|
||||
auto osInterface = device->getOSTime()->getOSInterface()->get();
|
||||
auto gen = device->getHardwareInfo().platform.eRenderCoreFamily;
|
||||
auto &hwHelper = HwHelper::get(gen);
|
||||
UNRECOVERABLE_IF(counter == nullptr);
|
||||
|
||||
counter->clientData.Windows.Adapter = reinterpret_cast<void *>(static_cast<UINT_PTR>(osInterface->getAdapterHandle()));
|
||||
counter->clientData.Windows.Device = reinterpret_cast<void *>(static_cast<UINT_PTR>(osInterface->getDeviceHandle()));
|
||||
counter->clientData.Windows.Device = reinterpret_cast<void *>(static_cast<UINT_PTR>(osInterface->getDeviceHandle()));
|
||||
counter->clientData.Windows.Escape = osInterface->getEscapeHandle();
|
||||
counter->clientData.Windows.KmdInstrumentationEnabled = device->getHardwareInfo().capabilityTable.instrumentationEnabled;
|
||||
counter->contextData.ClientData = &counter->clientData;
|
||||
counter->clientType.Gen = static_cast<MetricsLibraryApi::ClientGen>(hwHelper.getMetricsLibraryGenId());
|
||||
|
||||
return counter;
|
||||
}
|
||||
|
||||
PerformanceCountersWin::~PerformanceCountersWin() {
|
||||
if (pAutoSamplingInterface) {
|
||||
autoSamplingStopFunc(&pAutoSamplingInterface);
|
||||
pAutoSamplingInterface = nullptr;
|
||||
available = false;
|
||||
//////////////////////////////////////////////////////
|
||||
// PerformanceCountersWin::enableCountersConfiguration
|
||||
//////////////////////////////////////////////////////
|
||||
bool PerformanceCountersWin::enableCountersConfiguration() {
|
||||
// Release previous counters configuration so the user
|
||||
// can change configuration between kernels.
|
||||
releaseCountersConfiguration();
|
||||
|
||||
// Create mmio user configuration.
|
||||
if (!metricsLibrary->userConfigurationCreate(
|
||||
context,
|
||||
userConfiguration)) {
|
||||
DEBUG_BREAK_IF(true);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Create oa configuration.
|
||||
if (!metricsLibrary->oaConfigurationCreate(
|
||||
context,
|
||||
oaConfiguration)) {
|
||||
DEBUG_BREAK_IF(true);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Enable oa configuration.
|
||||
if (!metricsLibrary->oaConfigurationActivate(
|
||||
oaConfiguration)) {
|
||||
DEBUG_BREAK_IF(true);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// PerformanceCountersWin::releaseCountersConfiguration
|
||||
//////////////////////////////////////////////////////
|
||||
void PerformanceCountersWin::releaseCountersConfiguration() {
|
||||
// Mmio user configuration.
|
||||
if (userConfiguration.IsValid()) {
|
||||
metricsLibrary->userConfigurationDelete(userConfiguration);
|
||||
userConfiguration.data = nullptr;
|
||||
}
|
||||
|
||||
// Oa configuration.
|
||||
if (oaConfiguration.IsValid()) {
|
||||
metricsLibrary->oaConfigurationDeactivate(oaConfiguration);
|
||||
metricsLibrary->oaConfigurationDelete(oaConfiguration);
|
||||
oaConfiguration.data = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void PerformanceCountersWin::initialize(const HardwareInfo *hwInfo) {
|
||||
PerformanceCounters::initialize(hwInfo);
|
||||
setAvailableFunc(true);
|
||||
verifyEnableFunc(cbData);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -7,18 +7,18 @@
|
||||
|
||||
#pragma once
|
||||
#include "runtime/os_interface/performance_counters.h"
|
||||
#include "runtime/os_interface/windows/os_interface.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
class PerformanceCountersWin : virtual public PerformanceCounters {
|
||||
public:
|
||||
PerformanceCountersWin(OSTime *osTime);
|
||||
~PerformanceCountersWin() override;
|
||||
void initialize(const HardwareInfo *hwInfo) override;
|
||||
PerformanceCountersWin() = default;
|
||||
~PerformanceCountersWin() override = default;
|
||||
|
||||
protected:
|
||||
decltype(&instrSetAvailable) setAvailableFunc = instrSetAvailable;
|
||||
decltype(&instrEscVerifyEnable) verifyEnableFunc = instrEscVerifyEnable;
|
||||
/////////////////////////////////////////////////////
|
||||
// Gpu oa/mmio configuration.
|
||||
/////////////////////////////////////////////////////
|
||||
bool enableCountersConfiguration() override;
|
||||
void releaseCountersConfiguration() override;
|
||||
};
|
||||
} // namespace NEO
|
||||
|
||||
@@ -51,9 +51,11 @@ class TagAllocator {
|
||||
public:
|
||||
using NodeType = TagNode<TagType>;
|
||||
|
||||
TagAllocator(MemoryManager *memMngr, size_t tagCount, size_t tagAlignment) : memoryManager(memMngr),
|
||||
tagCount(tagCount),
|
||||
tagAlignment(tagAlignment) {
|
||||
TagAllocator(MemoryManager *memMngr, size_t tagCount, size_t tagAlignment, size_t tagSize = sizeof(TagType)) : memoryManager(memMngr),
|
||||
tagCount(tagCount),
|
||||
tagAlignment(tagAlignment) {
|
||||
|
||||
this->tagSize = alignUp(tagSize, tagAlignment);
|
||||
populateFreeTags();
|
||||
}
|
||||
|
||||
@@ -109,6 +111,7 @@ class TagAllocator {
|
||||
MemoryManager *memoryManager;
|
||||
size_t tagCount;
|
||||
size_t tagAlignment;
|
||||
size_t tagSize;
|
||||
|
||||
std::mutex allocatorMutex;
|
||||
|
||||
@@ -126,7 +129,6 @@ class TagAllocator {
|
||||
}
|
||||
|
||||
void populateFreeTags() {
|
||||
size_t tagSize = alignUp(sizeof(TagType), tagAlignment);
|
||||
size_t allocationSizeRequired = tagCount * tagSize;
|
||||
|
||||
auto allocationType = TagType::getAllocationType();
|
||||
|
||||
Reference in New Issue
Block a user