Metrics Library Performance Counters implementation.

Signed-off-by: Piotr Maciejewski <piotr.maciejewski@intel.com>
Change-Id: I0f00dca1892f4857baaebc75ba2208a4f33db1bf
This commit is contained in:
Piotr Maciejewski
2019-05-20 11:19:27 +02:00
committed by sys_ocldev
parent 369982995d
commit d1d794c658
67 changed files with 2154 additions and 2617 deletions

View File

@@ -88,10 +88,6 @@ endif()
target_compile_definitions(${NEO_STATIC_LIB_NAME} PUBLIC GMM_LIB_DLL DEFAULT_PLATFORM=${DEFAULT_SUPPORTED_PLATFORM})
if(INSTRUMENTATION_LIB_NAME)
add_dependencies(${NEO_STATIC_LIB_NAME} ${INSTRUMENTATION_LIB_NAME})
endif()
list(APPEND LIB_FLAGS_DEFINITIONS -DCIF_HEADERS_ONLY_BUILD ${SUPPORTED_GEN_FLAGS_DEFINITONS})
target_compile_definitions(${NEO_STATIC_LIB_NAME} PUBLIC ${LIB_FLAGS_DEFINITIONS})
@@ -149,15 +145,10 @@ if(${GENERATE_EXECUTABLE})
add_subdirectory(dll)
if(HAVE_INSTRUMENTATION)
target_link_libraries(${NEO_DYNAMIC_LIB_NAME} ${INSTRUMENTATION_LIB_NAME})
endif()
target_link_libraries(${NEO_DYNAMIC_LIB_NAME} ${NEO_STATIC_LIB_NAME} ${IGDRCL_EXTRA_LIBS})
target_include_directories(${NEO_DYNAMIC_LIB_NAME} BEFORE PRIVATE
${CMAKE_CURRENT_BINARY_DIR}
${INSTRUMENTATION_INCLUDE_PATH}
${AUB_STREAM_DIR}/..
)

View File

@@ -3374,25 +3374,8 @@ clSetPerformanceConfigurationINTEL(
cl_uint count,
cl_uint *offsets,
cl_uint *values) {
Device *pDevice = nullptr;
auto retVal = validateObjects(WithCastToInternal(device, &pDevice));
API_ENTER(&retVal);
DBG_LOG_INPUTS("device", device,
"count", count,
"offsets", offsets,
"values", values);
if (CL_SUCCESS != retVal) {
return retVal;
}
if (!pDevice->getHardwareInfo().capabilityTable.instrumentationEnabled) {
retVal = CL_PROFILING_INFO_NOT_AVAILABLE;
return retVal;
}
auto perfCounters = pDevice->getPerformanceCounters();
retVal = perfCounters->sendPerfConfiguration(count, offsets, values);
return retVal;
// Not supported, covered by Metric Library DLL.
return CL_INVALID_OPERATION;
}
void *clHostMemAllocINTEL(

View File

@@ -90,9 +90,6 @@ CommandQueue::~CommandQueue() {
}
delete commandStream;
if (perfConfigurationData) {
delete perfConfigurationData;
}
if (this->perfCountersEnabled) {
device->getPerformanceCounters()->shutdown();
}
@@ -275,44 +272,32 @@ bool CommandQueue::setPerfCountersEnabled(bool perfCountersEnabled, cl_uint conf
if (perfCountersEnabled == this->perfCountersEnabled) {
return true;
}
// Only dynamic configuration (set 0) is supported.
const uint32_t dynamicSet = 0;
if (configuration != dynamicSet) {
return false;
}
auto perfCounters = device->getPerformanceCounters();
if (perfCountersEnabled) {
perfCounters->enable();
if (!perfCounters->isAvailable()) {
perfCounters->shutdown();
return false;
}
perfConfigurationData = perfCounters->getPmRegsCfg(configuration);
if (perfConfigurationData == nullptr) {
perfCounters->shutdown();
return false;
}
InstrReadRegsCfg *pUserCounters = &perfConfigurationData->ReadRegs;
for (uint32_t i = 0; i < pUserCounters->RegsCount; ++i) {
perfCountersUserRegistersNumber++;
if (pUserCounters->Reg[i].BitSize > 32) {
perfCountersUserRegistersNumber++;
}
}
} else {
if (perfCounters->isAvailable()) {
perfCounters->shutdown();
}
perfCounters->shutdown();
}
this->perfCountersConfig = configuration;
this->perfCountersEnabled = perfCountersEnabled;
return true;
}
} // namespace NEO
PerformanceCounters *CommandQueue::getPerfCounters() {
return device->getPerformanceCounters();
}
bool CommandQueue::sendPerfCountersConfig() {
return getPerfCounters()->sendPmRegsCfgCommands(perfConfigurationData, &perfCountersRegsCfgHandle, &perfCountersRegsCfgPending);
}
cl_int CommandQueue::enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr, EventsRequest &eventsRequest) {
cl_int retVal = CL_SUCCESS;

View File

@@ -12,8 +12,6 @@
#include "runtime/helpers/engine_control.h"
#include "runtime/helpers/task_information.h"
#include "instrumentation.h"
#include <atomic>
#include <cstdint>
@@ -374,24 +372,14 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
return perfCountersEnabled;
}
InstrPmRegsCfg *getPerfCountersConfigData() {
return perfConfigurationData;
}
PerformanceCounters *getPerfCounters();
bool sendPerfCountersConfig();
bool setPerfCountersEnabled(bool perfCountersEnabled, cl_uint configuration);
void setIsSpecialCommandQueue(bool newValue) {
this->isSpecialCommandQueue = newValue;
}
uint16_t getPerfCountersUserRegistersNumber() const {
return perfCountersUserRegistersNumber;
}
QueuePriority getPriority() const {
return priority;
}
@@ -462,11 +450,6 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
QueueThrottle throttle = QueueThrottle::MEDIUM;
bool perfCountersEnabled = false;
cl_uint perfCountersConfig = std::numeric_limits<uint32_t>::max();
uint32_t perfCountersUserRegistersNumber = 0;
InstrPmRegsCfg *perfConfigurationData = nullptr;
uint32_t perfCountersRegsCfgHandle = 0;
bool perfCountersRegsCfgPending = false;
LinearStream *commandStream = nullptr;

View File

@@ -403,11 +403,6 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
if (event && this->isProfilingEnabled()) {
// Get allocation for timestamps
hwTimeStamps = event->getHwTimeStampNode();
if (this->isPerfCountersEnabled()) {
hwPerfCounter = event->getHwPerfCounterNode();
// PERF COUNTER: copy current configuration from queue to event
event->copyPerfCounters(this->getPerfCountersConfigData());
}
}
if (parentKernel) {
@@ -421,6 +416,10 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
}
}
if (event && this->isPerfCountersEnabled()) {
hwPerfCounter = event->getHwPerfCounterNode();
}
HardwareInterface<GfxFamily>::dispatchWalker(
*this,
multiDispatchInfo,

View File

@@ -175,24 +175,6 @@ class GpgpuWalkerHelper {
uint32_t aluRegister,
uint32_t operation,
uint32_t mask);
static void dispatchStoreRegisterCommand(
LinearStream *commandStream,
uint64_t memoryAddress,
uint32_t registerAddress);
static void dispatchPerfCountersGeneralPurposeCounterCommands(
LinearStream *commandStream,
uint64_t baseAddress);
static void dispatchPerfCountersUserCounterCommands(
CommandQueue &commandQueue,
LinearStream *commandStream,
uint64_t baseAddress);
static void dispatchPerfCountersOABufferStateCommands(
TagNode<HwPerfCounter> &hwPerfCounter,
LinearStream *commandStream);
};
template <typename GfxFamily>

View File

@@ -24,8 +24,6 @@
#include "runtime/memory_manager/graphics_allocation.h"
#include "runtime/utilities/tag_allocator.h"
#include "instrumentation.h"
#include <algorithm>
#include <cmath>
@@ -149,102 +147,17 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchProfilingCommandsEnd(
pMICmdLow->setMemoryAddress(timeStampAddress);
}
template <typename GfxFamily>
void GpgpuWalkerHelper<GfxFamily>::dispatchStoreRegisterCommand(
LinearStream *commandStream,
uint64_t memoryAddress,
uint32_t registerAddress) {
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
auto pCmd = commandStream->getSpaceForCmd<MI_STORE_REGISTER_MEM>();
*pCmd = GfxFamily::cmdInitStoreRegisterMem;
pCmd->setRegisterAddress(registerAddress);
pCmd->setMemoryAddress(memoryAddress);
}
template <typename GfxFamily>
void GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersGeneralPurposeCounterCommands(
LinearStream *commandStream,
uint64_t baseAddress) {
// Read General Purpose counters
for (auto i = 0u; i < NEO::INSTR_GENERAL_PURPOSE_COUNTERS_COUNT; i++) {
uint32_t regAddr = INSTR_GFX_OFFSETS::INSTR_PERF_CNT_1_DW0 + i * sizeof(cl_uint);
//Gp field is 2*uint64 wide so it can hold 4 uint32
uint64_t address = baseAddress + i * sizeof(cl_uint);
dispatchStoreRegisterCommand(commandStream, address, regAddr);
}
}
template <typename GfxFamily>
void GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersUserCounterCommands(
CommandQueue &commandQueue,
LinearStream *commandStream,
uint64_t baseAddress) {
auto userRegs = &commandQueue.getPerfCountersConfigData()->ReadRegs;
for (uint32_t i = 0; i < userRegs->RegsCount; i++) {
uint32_t regAddr = userRegs->Reg[i].Offset;
//offset between base (low) registers is cl_ulong wide
uint64_t address = baseAddress + i * sizeof(cl_ulong);
dispatchStoreRegisterCommand(commandStream, address, regAddr);
if (userRegs->Reg[i].BitSize > 32) {
dispatchStoreRegisterCommand(commandStream, address + sizeof(cl_uint), regAddr + sizeof(cl_uint));
}
}
}
template <typename GfxFamily>
void GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersOABufferStateCommands(
TagNode<HwPerfCounter> &hwPerfCounter,
LinearStream *commandStream) {
dispatchStoreRegisterCommand(commandStream, hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.OaStatus), INSTR_GFX_OFFSETS::INSTR_OA_STATUS);
dispatchStoreRegisterCommand(commandStream, hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.OaHead), INSTR_GFX_OFFSETS::INSTR_OA_HEAD_PTR);
dispatchStoreRegisterCommand(commandStream, hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.OaTail), INSTR_GFX_OFFSETS::INSTR_OA_TAIL_PTR);
}
template <typename GfxFamily>
void GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersCommandsStart(
CommandQueue &commandQueue,
TagNode<HwPerfCounter> &hwPerfCounter,
LinearStream *commandStream) {
using MI_REPORT_PERF_COUNT = typename GfxFamily::MI_REPORT_PERF_COUNT;
auto pPerformanceCounters = commandQueue.getPerfCounters();
const uint32_t size = pPerformanceCounters->getGpuCommandsSize(true);
void *pBuffer = commandStream->getSpace(size);
auto perfCounters = commandQueue.getPerfCounters();
uint32_t currentReportId = perfCounters->getCurrentReportId();
uint64_t address = 0;
//flush command streamer
auto pPipeControlCmd = commandStream->getSpaceForCmd<PIPE_CONTROL>();
*pPipeControlCmd = GfxFamily::cmdInitPipeControl;
pPipeControlCmd->setCommandStreamerStallEnable(true);
//Store value of NOOPID register
GpgpuWalkerHelper<GfxFamily>::dispatchStoreRegisterCommand(commandStream, hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.DMAFenceIdBegin), INSTR_MMIO_NOOPID);
//Read Core Frequency
GpgpuWalkerHelper<GfxFamily>::dispatchStoreRegisterCommand(commandStream, hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.CoreFreqBegin), INSTR_MMIO_RPSTAT1);
GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersGeneralPurposeCounterCommands(commandStream, hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.HwPerfReportBegin.Gp));
auto pReportPerfCount = commandStream->getSpaceForCmd<MI_REPORT_PERF_COUNT>();
*pReportPerfCount = GfxFamily::cmdInitReportPerfCount;
pReportPerfCount->setReportId(currentReportId);
address = hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.HwPerfReportBegin.Oa);
pReportPerfCount->setMemoryAddress(address);
address = hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWTimeStamp.GlobalStartTS);
PipeControlHelper<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(commandStream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, address, 0llu, false);
GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersUserCounterCommands(commandQueue, commandStream, hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.HwPerfReportBegin.User));
commandQueue.sendPerfCountersConfig();
pPerformanceCounters->getGpuCommands(hwPerfCounter, true, size, pBuffer);
}
template <typename GfxFamily>
@@ -253,40 +166,11 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersCommandsEnd(
TagNode<HwPerfCounter> &hwPerfCounter,
LinearStream *commandStream) {
using MI_REPORT_PERF_COUNT = typename GfxFamily::MI_REPORT_PERF_COUNT;
auto pPerformanceCounters = commandQueue.getPerfCounters();
const uint32_t size = pPerformanceCounters->getGpuCommandsSize(false);
void *pBuffer = commandStream->getSpace(size);
auto perfCounters = commandQueue.getPerfCounters();
uint32_t currentReportId = perfCounters->getCurrentReportId();
//flush command streamer
auto pPipeControlCmd = commandStream->getSpaceForCmd<PIPE_CONTROL>();
*pPipeControlCmd = GfxFamily::cmdInitPipeControl;
pPipeControlCmd->setCommandStreamerStallEnable(true);
GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersOABufferStateCommands(hwPerfCounter, commandStream);
//Timestamp: Global End
uint64_t address = hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWTimeStamp.GlobalEndTS);
PipeControlHelper<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(commandStream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, address, 0llu, false);
auto pReportPerfCount = commandStream->getSpaceForCmd<MI_REPORT_PERF_COUNT>();
*pReportPerfCount = GfxFamily::cmdInitReportPerfCount;
pReportPerfCount->setReportId(currentReportId);
address = hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.HwPerfReportEnd.Oa);
pReportPerfCount->setMemoryAddress(address);
GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersGeneralPurposeCounterCommands(commandStream, hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.HwPerfReportEnd.Gp));
//Store value of NOOPID register
GpgpuWalkerHelper<GfxFamily>::dispatchStoreRegisterCommand(commandStream, hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.DMAFenceIdEnd), INSTR_MMIO_NOOPID);
//Read Core Frequency
GpgpuWalkerHelper<GfxFamily>::dispatchStoreRegisterCommand(commandStream, hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.CoreFreqEnd), INSTR_MMIO_RPSTAT1);
GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersUserCounterCommands(commandQueue, commandStream, hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.HwPerfReportEnd.User));
perfCounters->setCpuTimestamp();
pPerformanceCounters->getGpuCommands(hwPerfCounter, false, size, pBuffer);
}
template <typename GfxFamily>

View File

@@ -189,31 +189,8 @@ size_t EnqueueOperation<GfxFamily>::getSizeRequiredCSKernel(bool reserveProfilin
size += 2 * sizeof(PIPE_CONTROL) + 2 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
}
if (reservePerfCounters) {
//start cmds
//P_C: flush CS & TimeStamp BEGIN
size += 2 * sizeof(PIPE_CONTROL);
//SRM NOOPID & Frequency
size += 2 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
//gp registers
size += NEO::INSTR_GENERAL_PURPOSE_COUNTERS_COUNT * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
//report perf count
size += sizeof(typename GfxFamily::MI_REPORT_PERF_COUNT);
//user registers
size += commandQueue.getPerfCountersUserRegistersNumber() * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
//end cmds
//P_C: flush CS & TimeStamp END;
size += 2 * sizeof(PIPE_CONTROL);
//OA buffer (status head, tail)
size += 3 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
//report perf count
size += sizeof(typename GfxFamily::MI_REPORT_PERF_COUNT);
//gp registers
size += NEO::INSTR_GENERAL_PURPOSE_COUNTERS_COUNT * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
//SRM NOOPID & Frequency
size += 2 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
//user registers
size += commandQueue.getPerfCountersUserRegistersNumber() * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
size += commandQueue.getPerfCounters()->getGpuCommandsSize(true);
size += commandQueue.getPerfCounters()->getGpuCommandsSize(false);
}
size += GpgpuWalkerHelper<GfxFamily>::getSizeForWADisableLSQCROPERFforOCL(pKernel);

View File

@@ -414,9 +414,9 @@ TagAllocator<HwTimeStamps> *CommandStreamReceiver::getEventTsAllocator() {
return profilingTimeStampAllocator.get();
}
TagAllocator<HwPerfCounter> *CommandStreamReceiver::getEventPerfCountAllocator() {
TagAllocator<HwPerfCounter> *CommandStreamReceiver::getEventPerfCountAllocator(const uint32_t tagSize) {
if (perfCounterAllocator.get() == nullptr) {
perfCounterAllocator = std::make_unique<TagAllocator<HwPerfCounter>>(getMemoryManager(), getPreferredTagPoolSize(), MemoryConstants::cacheLineSize);
perfCounterAllocator = std::make_unique<TagAllocator<HwPerfCounter>>(getMemoryManager(), getPreferredTagPoolSize(), MemoryConstants::cacheLineSize, tagSize);
}
return perfCounterAllocator.get();
}

View File

@@ -162,7 +162,7 @@ class CommandStreamReceiver {
OsContext &getOsContext() const { return *osContext; }
TagAllocator<HwTimeStamps> *getEventTsAllocator();
TagAllocator<HwPerfCounter> *getEventPerfCountAllocator();
TagAllocator<HwPerfCounter> *getEventPerfCountAllocator(const uint32_t tagSize);
TagAllocator<TimestampPacketStorage> *getTimestampPacketAllocator();
virtual cl_int expectMemory(const void *gfxAddress, const void *srcAddress, size_t length, uint32_t compareOperation);

View File

@@ -122,8 +122,7 @@ bool Device::createDeviceImpl() {
auto &hwInfo = getHardwareInfo();
if (osTime->getOSInterface()) {
if (hwInfo.capabilityTable.instrumentationEnabled) {
performanceCounters = createPerformanceCountersFunc(osTime.get());
performanceCounters->initialize(&hwInfo);
performanceCounters = createPerformanceCountersFunc(this);
}
}

View File

@@ -20,4 +20,11 @@ const char *gmmEntryName = GMM_ENTRY_NAME;
const char *sysFsPciPath = "/sys/bus/pci/devices/";
const char *tbxLibName = "libtbxAccess.so";
// Os specific Metrics Library name
#if __x86_64__ || __ppc64__
const char *metricsLibraryDllName = "libigdml64.so";
#else
const char *metricsLibraryDllName = "libigdml32.so";
#endif
} // namespace Os

View File

@@ -15,4 +15,11 @@ const char *igcDllName = IGC_LIBRARY_NAME;
const char *gdiDllName = "gdi32.dll";
const char *gmmDllName = GMM_UMD_DLL;
const char *gmmEntryName = GMM_ENTRY_NAME;
// Os specific Metrics Library name
#if _WIN64
const char *metricsLibraryDllName = "igdml64.dll";
#else
const char *metricsLibraryDllName = "igdml32.dll";
#endif
} // namespace Os

View File

@@ -26,6 +26,8 @@
#include "runtime/utilities/stackvec.h"
#include "runtime/utilities/tag_allocator.h"
#define OCLRT_NUM_TIMESTAMP_BITS (32)
namespace NEO {
const cl_uint Event::eventNotReady = 0xFFFFFFF0;
@@ -136,9 +138,6 @@ Event::~Event() {
if (ctx != nullptr) {
ctx->decRefInternal();
}
if (perfConfigurationData) {
delete perfConfigurationData;
}
// in case event did not unblock child events before
unblockEventsBlockedByThis(executionStatus);
@@ -201,12 +200,10 @@ cl_int Event::getEventProfilingInfo(cl_profiling_info paramName,
if (!perfCountersEnabled) {
return CL_INVALID_VALUE;
}
if (!cmdQueue->getPerfCounters()->processEventReport(paramValueSize,
paramValue,
paramValueSizeRet,
getHwPerfCounterNode()->tagForCpuAccess,
perfConfigurationData,
updateStatusAndCheckCompletion())) {
if (!cmdQueue->getPerfCounters()->getApiReport(paramValueSize,
paramValue,
paramValueSizeRet,
updateStatusAndCheckCompletion())) {
return CL_PROFILING_INFO_NOT_AVAILABLE;
}
return CL_SUCCESS;
@@ -701,17 +698,14 @@ TagNode<HwTimeStamps> *Event::getHwTimeStampNode() {
}
TagNode<HwPerfCounter> *Event::getHwPerfCounterNode() {
if (!perfCounterNode) {
perfCounterNode = cmdQueue->getCommandStreamReceiver().getEventPerfCountAllocator()->getTag();
if (!perfCounterNode && cmdQueue->getPerfCounters()) {
const uint32_t gpuReportSize = cmdQueue->getPerfCounters()->getGpuReportSize();
perfCounterNode = cmdQueue->getCommandStreamReceiver().getEventPerfCountAllocator(gpuReportSize)->getTag();
}
return perfCounterNode;
}
void Event::copyPerfCounters(InstrPmRegsCfg *config) {
perfConfigurationData = new InstrPmRegsCfg;
memcpy_s(perfConfigurationData, sizeof(InstrPmRegsCfg), config, sizeof(InstrPmRegsCfg));
}
void Event::addTimestampPacketNodes(const TimestampPacketContainer &inputTimestampPacketContainer) {
timestampPacketContainer->assignAndIncrementNodesRefCounts(inputTimestampPacketContainer);
}

View File

@@ -21,8 +21,6 @@
#include <cstdint>
#include <vector>
#define OCLRT_NUM_TIMESTAMP_BITS (32)
namespace NEO {
template <typename TagType>
struct TagNode;
@@ -121,8 +119,6 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
this->perfCountersEnabled = perfCountersEnabled;
}
void copyPerfCounters(InstrPmRegsCfg *config);
TagNode<HwPerfCounter> *getHwPerfCounterNode();
std::unique_ptr<FlushStampTracker> flushStamp;
@@ -375,7 +371,6 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
TagNode<HwTimeStamps> *timeStampNode = nullptr;
TagNode<HwPerfCounter> *perfCounterNode = nullptr;
std::unique_ptr<TimestampPacketContainer> timestampPacketContainer;
InstrPmRegsCfg *perfConfigurationData = nullptr;
//number of events this event depends on
std::atomic<int> parentCount;
//event parents

View File

@@ -10,20 +10,22 @@
#include "runtime/event/hw_timestamps.h"
#include "runtime/memory_manager/graphics_allocation.h"
#include "instrumentation.h"
namespace NEO {
struct HwPerfCounter {
void initialize() {
HWPerfCounters = {};
HWTimeStamp.initialize();
report[0] = 0;
}
static GraphicsAllocation::AllocationType getAllocationType() {
return GraphicsAllocation::AllocationType::PROFILING_TAG_BUFFER;
}
bool canBeReleased() const { return true; }
HwPerfCounters HWPerfCounters;
HwTimeStamps HWTimeStamp;
// Gpu report size is not known during compile time.
// Such information will be provided by metrics library dll.
// Bellow variable will be allocated dynamically based on information
// from metrics library. Take look at CommandStreamReceiver::getEventPerfCountAllocator.
uint8_t report[1] = {};
};
} // namespace NEO

View File

@@ -17,6 +17,11 @@ uint32_t HwHelperHw<Family>::getComputeUnitsUsedForScratch(const HardwareInfo *p
return pHwInfo->gtSystemInfo.MaxSubSlicesSupported * pHwInfo->gtSystemInfo.MaxEuPerSubSlice * 8;
}
template <>
uint32_t HwHelperHw<Family>::getMetricsLibraryGenId() const {
return static_cast<uint32_t>(MetricsLibraryApi::ClientGen::Gen11);
}
template class AubHelperHw<Family>;
template class HwHelperHw<Family>;
template class FlatBatchBufferHelperHw<Family>;

View File

@@ -33,6 +33,11 @@ void PipeControlHelper<Family>::addPipeControlWA(LinearStream &commandStream) {
pCmd->setCommandStreamerStallEnable(true);
}
template <>
uint32_t HwHelperHw<Family>::getMetricsLibraryGenId() const {
return static_cast<uint32_t>(MetricsLibraryApi::ClientGen::Gen9);
}
template class AubHelperHw<Family>;
template class HwHelperHw<Family>;
template class FlatBatchBufferHelperHw<Family>;

View File

@@ -64,6 +64,7 @@ class HwHelper {
virtual bool getEnableLocalMemory(const HardwareInfo &hwInfo) const = 0;
virtual std::string getExtensions() const = 0;
static uint32_t getMaxThreadsForVfe(const HardwareInfo &hwInfo);
virtual uint32_t getMetricsLibraryGenId() const = 0;
static constexpr uint32_t lowPriorityGpgpuEngineIndex = 1;
@@ -155,6 +156,8 @@ class HwHelperHw : public HwHelper {
std::string getExtensions() const override;
uint32_t getMetricsLibraryGenId() const override;
protected:
HwHelperHw() = default;
};

View File

@@ -204,4 +204,9 @@ int PipeControlHelper<GfxFamily>::getRequiredPipeControlSize() {
return pipeControlCount * sizeof(typename GfxFamily::PIPE_CONTROL);
}
template <typename GfxFamily>
uint32_t HwHelperHw<GfxFamily>::getMetricsLibraryGenId() const {
return static_cast<uint32_t>(MetricsLibraryApi::ClientGen::Gen9);
}
} // namespace NEO

View File

@@ -9,86 +9,4 @@
namespace NEO {
const bool haveInstrumentation = false;
bool instrAutoSamplingStart(
InstrEscCbData cbData,
void **ppOAInterface) {
return false;
}
bool instrAutoSamplingStop(
void **ppOAInterface) {
return false;
}
bool instrCheckPmRegsCfg(
InstrPmRegsCfg *pQueryPmRegsCfg,
uint32_t *pLastPmRegsCfgHandle,
const void *pASInterface) {
return false;
}
void instrGetPerfCountersQueryData(
InstrEscCbData cbData,
GTDI_QUERY *pData,
HwPerfCounters *pLayout,
uint64_t cpuRawTimestamp,
void *pASInterface,
InstrPmRegsCfg *pPmRegsCfg,
bool useMiRPC,
bool resetASData,
const InstrAllowedContexts *pAllowedContexts) {
}
bool instrEscGetPmRegsCfg(
InstrEscCbData cbData,
uint32_t cfgId,
InstrPmRegsCfg *pCfg,
InstrAutoSamplingMode *pAutoSampling) {
return false;
}
bool instrEscHwMetricsEnable(
InstrEscCbData cbData,
bool enable) {
return false;
}
bool instrEscLoadPmRegsCfg(
InstrEscCbData cbData,
InstrPmRegsCfg *pCfg,
bool hardwareAccess) {
return false;
}
bool instrEscSetPmRegsCfg(
InstrEscCbData cbData,
uint32_t count,
uint32_t *pOffsets,
uint32_t *pValues) {
return false;
}
bool instrEscSendReadRegsCfg(
InstrEscCbData cbData,
uint32_t count,
uint32_t *pOffsets,
uint32_t *pBitSizes) {
return false;
}
bool instrSetAvailable(bool enabled) {
return false;
}
void instrEscVerifyEnable(
InstrEscCbData cbData) {
}
uint32_t instrSetPlatformInfo(
uint32_t productId,
void *featureTable) {
return 0;
}
} // namespace NEO

View File

@@ -10,201 +10,162 @@
#include <cstdint>
namespace NEO {
constexpr unsigned int INSTR_GENERAL_PURPOSE_COUNTERS_COUNT = 4;
constexpr unsigned int INSTR_MAX_USER_COUNTERS_COUNT = 32;
constexpr unsigned int INSTR_MMIO_NOOPID = 0x2094;
constexpr unsigned int INSTR_MMIO_RPSTAT1 = 0xA01C;
constexpr unsigned int INSTR_GTDI_MAX_READ_REGS = 16;
constexpr unsigned int INSTR_GTDI_PERF_METRICS_OA_COUNT = 36;
constexpr unsigned int INSTR_GTDI_PERF_METRICS_OA_40b_COUNT = 32;
constexpr unsigned int INSTR_GTDI_PERF_METRICS_NOA_COUNT = 16;
constexpr unsigned int INSTR_MAX_CONTEXT_TAGS = 128;
constexpr unsigned int INSTR_MAX_OA_PROLOG = 2;
constexpr unsigned int INSTR_MAX_OA_EPILOG = 2;
constexpr unsigned int INSTR_MAX_PM_REGS_BASE = 256;
constexpr unsigned int INSTR_MAX_PM_REGS = (INSTR_MAX_PM_REGS_BASE + INSTR_MAX_OA_PROLOG + INSTR_MAX_OA_EPILOG);
constexpr unsigned int INSTR_PM_REGS_CFG_INVALID = 0;
constexpr unsigned int INSTR_READ_REGS_CFG_TAG = 0xFFFFFFFE;
constexpr unsigned int INSTR_MAX_READ_REGS = 16;
extern const bool haveInstrumentation;
} // namespace NEO
typedef enum {
INSTR_AS_MODE_OFF,
INSTR_AS_MODE_EVENT,
INSTR_AS_MODE_TIMER,
INSTR_AS_MODE_DMA
} InstrAutoSamplingMode;
namespace MetricsLibraryApi {
// Dummy macros.
#define ML_STDCALL
#define METRICS_LIBRARY_CONTEXT_CREATE_1_0 "create"
#define METRICS_LIBRARY_CONTEXT_DELETE_1_0 "delete"
typedef enum GTDI_CONFIGURATION_SET {
GTDI_CONFIGURATION_SET_DYNAMIC = 0,
GTDI_CONFIGURATION_SET_1,
GTDI_CONFIGURATION_SET_2,
GTDI_CONFIGURATION_SET_3,
GTDI_CONFIGURATION_SET_4,
GTDI_CONFIGURATION_SET_COUNT,
GTDI_CONFIGURATION_SET_MAX = 0xFFFFFFFF
} GTDI_CONFIGURATION_SET;
// Dummy enumerators.
enum class ClientApi : uint32_t { OpenCL };
enum class ClientGen : uint32_t { Unknown,
Gen9,
Gen11 };
enum class ValueType : uint32_t { Uint32 };
enum class GpuConfigurationActivationType : uint32_t { Tbs,
EscapeCode };
enum class ObjectType : uint32_t { QueryHwCounters,
ConfigurationHwCountersUser,
ConfigurationHwCountersOa };
enum class ParameterType : uint32_t { QueryHwCountersReportApiSize,
QueryHwCountersReportGpuSize };
enum class StatusCode : uint32_t { Failed,
IncorrectObject,
Success };
enum class GpuCommandBufferType : uint32_t { Render };
enum INSTR_GFX_OFFSETS {
INSTR_PERF_CNT_1_DW0 = 0x91B8,
INSTR_PERF_CNT_1_DW1 = 0x91BC,
INSTR_PERF_CNT_2_DW0 = 0x91C0,
INSTR_PERF_CNT_2_DW1 = 0x91C4,
INSTR_OA_STATUS = 0x2B08,
INSTR_OA_HEAD_PTR = 0x2B0C,
INSTR_OA_TAIL_PTR = 0x2B10
// Dummy handles.
struct Handle {
void *data;
bool IsValid() const { return data != nullptr; } // NOLINT
};
struct QueryHandle_1_0 : Handle {};
struct ConfigurationHandle_1_0 : Handle {};
struct ContextHandle_1_0 : Handle {};
// Dummy structures.
struct ClientCallbacks_1_0 {};
struct ClientDataWindows_1_0 {
void *Device;
void *Adapter;
void *Escape;
bool KmdInstrumentationEnabled;
};
typedef struct {
struct ClientDataLinux_1_0 {
void *Reserved;
};
} GTDI_QUERY;
struct ClientData_1_0 {
union {
ClientDataWindows_1_0 Windows;
ClientDataLinux_1_0 Linux;
};
};
typedef struct {
uint32_t contextId[INSTR_MAX_CONTEXT_TAGS];
uint32_t count;
} InstrAllowedContexts;
struct ConfigurationActivateData_1_0 {
GpuConfigurationActivationType Type;
};
typedef struct {
uint64_t counter[INSTR_GTDI_MAX_READ_REGS];
uint32_t userCntrCfgId;
} InstrReportDataUser;
struct ClientType_1_0 {
ClientApi Api;
ClientGen Gen;
};
typedef struct {
uint32_t reportId;
uint32_t timestamp;
uint32_t contextId;
uint32_t gpuTicksCounter;
} InstrReportDataOaHeader;
struct TypedValue_1_0 {
uint32_t ValueUInt32;
};
typedef struct {
uint32_t oaCounter[INSTR_GTDI_PERF_METRICS_OA_COUNT];
uint8_t oaCounterHB[INSTR_GTDI_PERF_METRICS_OA_40b_COUNT];
uint32_t noaCounter[INSTR_GTDI_PERF_METRICS_NOA_COUNT];
} InstrReportDataOaData;
struct GpuMemory_1_0 {
uint64_t GpuAddress;
void *CpuAddress;
};
typedef struct {
InstrReportDataOaHeader header;
InstrReportDataOaData data;
} InstrReportDataOa;
struct CommandBufferQueryHwCounters_1_0 {
QueryHandle_1_0 Handle;
ConfigurationHandle_1_0 HandleUserConfiguration;
bool Begin;
};
typedef struct {
uint64_t counter1;
uint64_t counter2;
} InstrReportDataMonitor;
struct CommandBufferSize_1_0 {
uint32_t GpuMemorySize;
};
typedef struct {
InstrReportDataMonitor Gp;
InstrReportDataUser User;
InstrReportDataOa Oa;
} InstrReportData;
struct ConfigurationCreateData_1_0 {
ContextHandle_1_0 HandleContext;
ObjectType Type;
};
typedef struct {
uint32_t DMAFenceIdBegin;
uint32_t DMAFenceIdEnd;
uint32_t CoreFreqBegin;
uint32_t CoreFreqEnd;
InstrReportData HwPerfReportBegin;
InstrReportData HwPerfReportEnd;
uint32_t OaStatus;
uint32_t OaHead;
uint32_t OaTail;
} HwPerfCounters;
struct CommandBufferData_1_0 {
ContextHandle_1_0 HandleContext;
ObjectType CommandsType;
GpuCommandBufferType Type;
GpuMemory_1_0 Allocation;
void *Data;
uint32_t Size;
CommandBufferQueryHwCounters_1_0 QueryHwCounters;
};
typedef struct {
uint32_t Offset;
uint32_t BitSize;
} InstrPmReg;
struct QueryCreateData_1_0 {
ContextHandle_1_0 HandleContext;
ObjectType Type;
uint32_t Slots;
};
typedef struct {
uint32_t Handle;
uint32_t RegsCount;
} InstrPmRegsOaCountersCfg;
struct GetReportQuery_1_0 {
QueryHandle_1_0 Handle;
typedef struct {
uint32_t Handle;
uint32_t RegsCount;
} InstrPmRegsGpCountersCfg;
uint32_t Slot;
uint32_t SlotsCount;
typedef struct {
InstrPmReg Reg[INSTR_MAX_READ_REGS];
uint32_t RegsCount;
} InstrReadRegsCfg;
uint32_t DataSize;
void *Data;
};
typedef struct {
InstrPmRegsOaCountersCfg OaCounters;
InstrPmRegsGpCountersCfg GpCounters;
InstrReadRegsCfg ReadRegs;
} InstrPmRegsCfg;
struct GetReportData_1_0 {
ObjectType Type;
GetReportQuery_1_0 Query;
};
typedef struct {
void *hAdapter;
void *hDevice;
void *pfnEscapeCb;
bool DDI;
} InstrEscCbData;
struct ContextCreateData_1_0 {
ClientData_1_0 *ClientData;
ClientCallbacks_1_0 *ClientCallbacks;
struct Interface_1_0 *Api;
};
bool instrAutoSamplingStart(
InstrEscCbData cbData,
void **ppOAInterface);
// Dummy functions.
using ContextCreateFunction_1_0 = StatusCode(ML_STDCALL *)(ClientType_1_0 clientType, struct ContextCreateData_1_0 *createData, ContextHandle_1_0 *handle);
using ContextDeleteFunction_1_0 = StatusCode(ML_STDCALL *)(const ContextHandle_1_0 handle);
using GetParameterFunction_1_0 = StatusCode(ML_STDCALL *)(const ParameterType parameter, ValueType *type, TypedValue_1_0 *value);
using CommandBufferGetFunction_1_0 = StatusCode(ML_STDCALL *)(const CommandBufferData_1_0 *data);
using CommandBufferGetSizeFunction_1_0 = StatusCode(ML_STDCALL *)(const CommandBufferData_1_0 *data, CommandBufferSize_1_0 *size);
using QueryCreateFunction_1_0 = StatusCode(ML_STDCALL *)(const QueryCreateData_1_0 *createData, QueryHandle_1_0 *handle);
using QueryDeleteFunction_1_0 = StatusCode(ML_STDCALL *)(const QueryHandle_1_0 handle);
using ConfigurationCreateFunction_1_0 = StatusCode(ML_STDCALL *)(const ConfigurationCreateData_1_0 *createData, ConfigurationHandle_1_0 *handle);
using ConfigurationActivateFunction_1_0 = StatusCode(ML_STDCALL *)(const ConfigurationHandle_1_0 handle, const ConfigurationActivateData_1_0 *activateData);
using ConfigurationDeactivateFunction_1_0 = StatusCode(ML_STDCALL *)(const ConfigurationHandle_1_0 handle);
using ConfigurationDeleteFunction_1_0 = StatusCode(ML_STDCALL *)(const ConfigurationHandle_1_0 handle);
using GetDataFunction_1_0 = StatusCode(ML_STDCALL *)(GetReportData_1_0 *data);
bool instrAutoSamplingStop(
void **ppOAInterface);
// Dummy interface.
struct Interface_1_0 {
GetParameterFunction_1_0 GetParameter;
bool instrCheckPmRegsCfg(
InstrPmRegsCfg *pQueryPmRegsCfg,
uint32_t *pLastPmRegsCfgHandle,
const void *pASInterface);
CommandBufferGetFunction_1_0 CommandBufferGet;
CommandBufferGetSizeFunction_1_0 CommandBufferGetSize;
void instrGetPerfCountersQueryData(
InstrEscCbData cbData,
GTDI_QUERY *pData,
HwPerfCounters *pLayout,
uint64_t cpuRawTimestamp,
void *pASInterface,
InstrPmRegsCfg *pPmRegsCfg,
bool useMiRPC,
bool resetASData = false,
const InstrAllowedContexts *pAllowedContexts = nullptr);
QueryCreateFunction_1_0 QueryCreate;
QueryDeleteFunction_1_0 QueryDelete;
bool instrEscGetPmRegsCfg(
InstrEscCbData cbData,
uint32_t cfgId,
InstrPmRegsCfg *pCfg,
InstrAutoSamplingMode *pAutoSampling);
ConfigurationCreateFunction_1_0 ConfigurationCreate;
ConfigurationActivateFunction_1_0 ConfigurationActivate;
ConfigurationDeactivateFunction_1_0 ConfigurationDeactivate;
ConfigurationDeleteFunction_1_0 ConfigurationDelete;
bool instrEscHwMetricsEnable(
InstrEscCbData cbData,
bool enable);
bool instrEscLoadPmRegsCfg(
InstrEscCbData cbData,
InstrPmRegsCfg *pCfg,
bool hardwareAccess = 1);
bool instrEscSetPmRegsCfg(
InstrEscCbData cbData,
uint32_t count,
uint32_t *pOffsets,
uint32_t *pValues);
bool instrEscSendReadRegsCfg(
InstrEscCbData cbData,
uint32_t count,
uint32_t *pOffsets,
uint32_t *pBitSizes);
bool instrSetAvailable(bool enabled);
void instrEscVerifyEnable(
InstrEscCbData cbData);
uint32_t instrSetPlatformInfo(
uint32_t productId,
void *featureTable);
} // namespace NEO
GetDataFunction_1_0 GetData;
};
}; // namespace MetricsLibraryApi

View File

@@ -14,6 +14,8 @@ set(RUNTIME_SRCS_OS_INTERFACE_BASE
${CMAKE_CURRENT_SOURCE_DIR}/debug_settings_manager.h
${CMAKE_CURRENT_SOURCE_DIR}/device_factory.cpp
${CMAKE_CURRENT_SOURCE_DIR}/device_factory.h
${CMAKE_CURRENT_SOURCE_DIR}/metrics_library.cpp
${CMAKE_CURRENT_SOURCE_DIR}/metrics_library.h
${CMAKE_CURRENT_SOURCE_DIR}/os_context.h
${CMAKE_CURRENT_SOURCE_DIR}/os_inc_base.h
${CMAKE_CURRENT_SOURCE_DIR}/os_interface.h

View File

@@ -44,6 +44,7 @@ set(RUNTIME_SRCS_OS_INTERFACE_LINUX
${CMAKE_CURRENT_SOURCE_DIR}/os_library.cpp
${CMAKE_CURRENT_SOURCE_DIR}/os_library.h
${CMAKE_CURRENT_SOURCE_DIR}/os_memory_linux.cpp
${CMAKE_CURRENT_SOURCE_DIR}/os_metrics_library.cpp
${CMAKE_CURRENT_SOURCE_DIR}/os_thread_linux.cpp
${CMAKE_CURRENT_SOURCE_DIR}/os_thread_linux.h
${CMAKE_CURRENT_SOURCE_DIR}/os_time_linux.cpp

View File

@@ -19,10 +19,6 @@ OSInterface::~OSInterface() {
delete osInterfaceImpl;
}
uint32_t OSInterface::getHwContextId() const {
return 0;
}
bool OSInterface::are64kbPagesEnabled() {
return osEnabled64kbPages;
}

View File

@@ -0,0 +1,51 @@
/*
* Copyright (C) 2017-2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "runtime/os_interface/metrics_library.h"
namespace NEO {
//////////////////////////////////////////////////////
// FUNCTION: MetricsLibrary::oaConfigurationActivate
//////////////////////////////////////////////////////
bool MetricsLibrary::oaConfigurationActivate(
const ConfigurationHandle_1_0 &handle) {
ConfigurationActivateData_1_0 data = {};
data.Type = GpuConfigurationActivationType::Tbs;
return api->functions.ConfigurationActivate(
handle,
&data) == StatusCode::Success;
}
//////////////////////////////////////////////////////
// FUNCTION: MetricsLibrary::oaConfigurationDeactivate
//////////////////////////////////////////////////////
bool MetricsLibrary::oaConfigurationDeactivate(
const ConfigurationHandle_1_0 &handle) {
return api->functions.ConfigurationDeactivate(
handle) == StatusCode::Success;
}
//////////////////////////////////////////////////////
// FUNCTION: MetricsLibrary::userConfigurationCreate
//////////////////////////////////////////////////////
bool MetricsLibrary::userConfigurationCreate(
const ContextHandle_1_0 &context,
ConfigurationHandle_1_0 &handle) {
// Not supported on Linux.
return true;
}
//////////////////////////////////////////////////////
// FUNCTION: MetricsLibrary::userConfigurationDelete
//////////////////////////////////////////////////////
bool MetricsLibrary::userConfigurationDelete(
const ConfigurationHandle_1_0 &handle) {
// Not supported on Linux.
return true;
}
} // namespace NEO

View File

@@ -7,66 +7,58 @@
#include "performance_counters_linux.h"
#include "runtime/device/device.h"
#include "runtime/helpers/hw_helper.h"
namespace NEO {
////////////////////////////////////////////////////
// PerformanceCounters::create
////////////////////////////////////////////////////
std::unique_ptr<PerformanceCounters> PerformanceCounters::create(Device *device) {
auto counter = std::make_unique<PerformanceCountersLinux>();
auto gen = device->getHardwareInfo().platform.eRenderCoreFamily;
auto &hwHelper = HwHelper::get(gen);
UNRECOVERABLE_IF(counter == nullptr);
std::unique_ptr<PerformanceCounters> PerformanceCounters::create(OSTime *osTime) {
return std::unique_ptr<PerformanceCounters>(new PerformanceCountersLinux(osTime));
}
PerformanceCountersLinux::PerformanceCountersLinux(OSTime *osTime) : PerformanceCounters(osTime) {
mdLibHandle = nullptr;
perfmonLoadConfigFunc = nullptr;
counter->clientType.Gen = static_cast<MetricsLibraryApi::ClientGen>(hwHelper.getMetricsLibraryGenId());
return counter;
}
PerformanceCountersLinux::~PerformanceCountersLinux() {
if (pAutoSamplingInterface) {
autoSamplingStopFunc(&pAutoSamplingInterface);
pAutoSamplingInterface = nullptr;
available = false;
}
//////////////////////////////////////////////////////
// PerformanceCountersLinux::enableCountersConfiguration
//////////////////////////////////////////////////////
bool PerformanceCountersLinux::enableCountersConfiguration() {
// Release previous counters configuration so the user
// can change configuration between kernels.
releaseCountersConfiguration();
if (mdLibHandle) {
dlcloseFunc(mdLibHandle);
mdLibHandle = nullptr;
}
}
void PerformanceCountersLinux::initialize(const HardwareInfo *hwInfo) {
PerformanceCounters::initialize(hwInfo);
mdLibHandle = dlopenFunc("libmd.so", RTLD_LAZY | RTLD_LOCAL);
if (mdLibHandle) {
perfmonLoadConfigFunc = reinterpret_cast<perfmonLoadConfig_t>(dlsymFunc(mdLibHandle, "drm_intel_perfmon_load_config"));
}
setPlatformInfoFunc(hwInfo->platform.eProductFamily, (void *)(&hwInfo->featureTable));
}
void PerformanceCountersLinux::enableImpl() {
if (mdLibHandle && perfmonLoadConfigFunc) {
PerformanceCounters::enableImpl();
}
}
bool PerformanceCountersLinux::verifyPmRegsCfg(InstrPmRegsCfg *pCfg, uint32_t *pLastPmRegsCfgHandle, bool *pLastPmRegsCfgPending) {
if (perfmonLoadConfigFunc == nullptr) {
// Create oa configuration.
if (!metricsLibrary->oaConfigurationCreate(
context,
oaConfiguration)) {
DEBUG_BREAK_IF(true);
return false;
}
if (PerformanceCounters::verifyPmRegsCfg(pCfg, pLastPmRegsCfgHandle, pLastPmRegsCfgPending)) {
return getPerfmonConfig(pCfg);
}
return false;
}
bool PerformanceCountersLinux::getPerfmonConfig(InstrPmRegsCfg *pCfg) {
unsigned int oaCfgHandle = pCfg->OaCounters.Handle;
unsigned int gpCfgHandle = pCfg->GpCounters.Handle;
int fd = osInterface->get()->getDrm()->getFileDescriptor();
if (perfmonLoadConfigFunc(fd, nullptr, &oaCfgHandle, &gpCfgHandle) != 0) {
return false;
}
if (pCfg->OaCounters.Handle != 0 && oaCfgHandle != pCfg->OaCounters.Handle) {
return false;
}
if (pCfg->GpCounters.Handle != 0 && gpCfgHandle != pCfg->GpCounters.Handle) {
// Enable oa configuration.
if (!metricsLibrary->oaConfigurationActivate(
oaConfiguration)) {
DEBUG_BREAK_IF(true);
return false;
}
return true;
}
//////////////////////////////////////////////////////
// PerformanceCountersLinux::releaseCountersConfiguration
//////////////////////////////////////////////////////
void PerformanceCountersLinux::releaseCountersConfiguration() {
// Oa configuration.
if (oaConfiguration.IsValid()) {
metricsLibrary->oaConfigurationDeactivate(oaConfiguration);
metricsLibrary->oaConfigurationDelete(oaConfiguration);
oaConfiguration.data = nullptr;
}
}
} // namespace NEO

View File

@@ -8,35 +8,17 @@
#pragma once
#include "runtime/os_interface/performance_counters.h"
#include "os_interface.h"
#include <dlfcn.h>
typedef struct _drm_intel_context drm_intel_context;
namespace NEO {
class PerformanceCountersLinux : virtual public PerformanceCounters {
public:
PerformanceCountersLinux(OSTime *osTime);
~PerformanceCountersLinux() override;
void initialize(const HardwareInfo *hwInfo) override;
void enableImpl() override;
PerformanceCountersLinux() = default;
~PerformanceCountersLinux() override = default;
protected:
virtual bool getPerfmonConfig(InstrPmRegsCfg *pCfg);
bool verifyPmRegsCfg(InstrPmRegsCfg *pCfg, uint32_t *pLastPmRegsCfgHandle, bool *pLastPmRegsCfgPending) override;
typedef int (*perfmonLoadConfig_t)(int fd, drm_intel_context *ctx, uint32_t *oaCfgId, uint32_t *gpCfgId);
typedef void *(*dlopenFunc_t)(const char *, int);
typedef void *(*dlsymFunc_t)(void *, const char *);
void *mdLibHandle;
perfmonLoadConfig_t perfmonLoadConfigFunc;
dlopenFunc_t dlopenFunc = dlopen;
dlsymFunc_t dlsymFunc = dlsym;
decltype(&dlclose) dlcloseFunc = dlclose;
decltype(&instrSetPlatformInfo) setPlatformInfoFunc = instrSetPlatformInfo;
/////////////////////////////////////////////////////
// Gpu oa/mmio configuration.
/////////////////////////////////////////////////////
bool enableCountersConfiguration() override;
void releaseCountersConfiguration() override;
};
} // namespace NEO

View File

@@ -0,0 +1,188 @@
/*
* Copyright (C) 2017-2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "runtime/os_interface/metrics_library.h"
#include "runtime/helpers/hw_helper.h"
#include "runtime/os_interface/os_inc_base.h"
namespace NEO {
///////////////////////////////////////////////////////
// FUNCTION: MetricsLibrary::MetricsLibrary
///////////////////////////////////////////////////////
MetricsLibrary::MetricsLibrary() {
api = std::make_unique<MetricsLibraryInterface>();
osLibrary.reset(OsLibrary::load(Os::metricsLibraryDllName));
}
//////////////////////////////////////////////////////
// FUNCTION: MetricsLibrary::open
//////////////////////////////////////////////////////
bool MetricsLibrary::open() {
UNRECOVERABLE_IF(osLibrary.get() == nullptr);
if (osLibrary->isLoaded()) {
api->contextCreate = reinterpret_cast<ContextCreateFunction_1_0>(osLibrary->getProcAddress(METRICS_LIBRARY_CONTEXT_CREATE_1_0));
api->contextDelete = reinterpret_cast<ContextDeleteFunction_1_0>(osLibrary->getProcAddress(METRICS_LIBRARY_CONTEXT_DELETE_1_0));
} else {
api->contextCreate = nullptr;
api->contextDelete = nullptr;
}
if (!api->contextCreate) {
return false;
}
if (!api->contextDelete) {
return false;
}
return true;
}
//////////////////////////////////////////////////////
// MetricsLibrary::createContext
//////////////////////////////////////////////////////
bool MetricsLibrary::contextCreate(
const ClientType_1_0 &clientType,
ClientData_1_0 &clientData,
ContextCreateData_1_0 &createData,
ContextHandle_1_0 &handle) {
createData.Api = &api->functions;
createData.ClientCallbacks = &api->callbacks;
createData.ClientData = &clientData;
return api->contextCreate(
clientType,
&createData,
&handle) == StatusCode::Success;
}
//////////////////////////////////////////////////////
// MetricsLibrary::contextDelete
//////////////////////////////////////////////////////
bool MetricsLibrary::contextDelete(
const ContextHandle_1_0 &handle) {
return api->contextDelete(handle) == StatusCode::Success;
}
//////////////////////////////////////////////////////
// MetricsLibrary::hwCountersCreate
//////////////////////////////////////////////////////
bool MetricsLibrary::hwCountersCreate(
const ContextHandle_1_0 &context,
const uint32_t slots,
const ConfigurationHandle_1_0 user,
QueryHandle_1_0 &query) {
QueryCreateData_1_0 data = {};
data.HandleContext = context;
data.Type = ObjectType::QueryHwCounters;
data.Slots = slots;
return api->functions.QueryCreate(
&data,
&query) == StatusCode::Success;
}
//////////////////////////////////////////////////////
// MetricsLibrary::hwCountersDelete
//////////////////////////////////////////////////////
bool MetricsLibrary::hwCountersDelete(
const QueryHandle_1_0 &query) {
return api->functions.QueryDelete(query) == StatusCode::Success;
}
//////////////////////////////////////////////////////
// MetricsLibrary::hwCountersGetReport
//////////////////////////////////////////////////////
bool MetricsLibrary::hwCountersGetReport(
const QueryHandle_1_0 &handle,
const uint32_t slot,
const uint32_t slotsCount,
const uint32_t dataSize,
void *data) {
GetReportData_1_0 report = {};
report.Type = ObjectType::QueryHwCounters;
report.Query.Handle = handle;
report.Query.Slot = slot;
report.Query.SlotsCount = slotsCount;
report.Query.Data = data;
report.Query.DataSize = dataSize;
return api->functions.GetData(&report) == StatusCode::Success;
}
//////////////////////////////////////////////////////
// MetricsLibrary::hwCountersGetApiReportSize
//////////////////////////////////////////////////////
uint32_t MetricsLibrary::hwCountersGetApiReportSize() {
ValueType type = ValueType::Uint32;
TypedValue_1_0 value = {};
return api->functions.GetParameter(ParameterType::QueryHwCountersReportApiSize, &type, &value) == StatusCode::Success
? value.ValueUInt32
: 0;
}
//////////////////////////////////////////////////////
// MetricsLibrary::hwCountersGetGpuReportSize
//////////////////////////////////////////////////////
uint32_t MetricsLibrary::hwCountersGetGpuReportSize() {
ValueType type = ValueType::Uint32;
TypedValue_1_0 value = {};
return api->functions.GetParameter(ParameterType::QueryHwCountersReportGpuSize, &type, &value) == StatusCode::Success
? value.ValueUInt32
: 0;
}
//////////////////////////////////////////////////////
// MetricsLibrary::commandBufferGet
//////////////////////////////////////////////////////
bool MetricsLibrary::commandBufferGet(
CommandBufferData_1_0 &data) {
return api->functions.CommandBufferGet(
&data) == StatusCode::Success;
}
//////////////////////////////////////////////////////
// MetricsLibrary::commandBufferGetSize
//////////////////////////////////////////////////////
bool MetricsLibrary::commandBufferGetSize(
const CommandBufferData_1_0 &commandBufferData,
CommandBufferSize_1_0 &commandBufferSize) {
return api->functions.CommandBufferGetSize(
&commandBufferData,
&commandBufferSize) == StatusCode::Success;
}
//////////////////////////////////////////////////////
// MetricsLibrary::oaConfigurationCreate
//////////////////////////////////////////////////////
bool MetricsLibrary::oaConfigurationCreate(
const ContextHandle_1_0 &context,
ConfigurationHandle_1_0 &handle) {
ConfigurationCreateData_1_0 data = {};
data.HandleContext = context;
data.Type = ObjectType::ConfigurationHwCountersOa;
return api->functions.ConfigurationCreate(
&data,
&handle) == StatusCode::Success;
}
//////////////////////////////////////////////////////
// MetricsLibrary::oaConfigurationDelete
//////////////////////////////////////////////////////
bool MetricsLibrary::oaConfigurationDelete(
const ConfigurationHandle_1_0 &handle) {
return api->functions.ConfigurationDelete(handle) == StatusCode::Success;
}
} // namespace NEO

View File

@@ -0,0 +1,89 @@
/*
* Copyright (C) 2017-2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "runtime/os_interface/os_library.h"
#include "instrumentation.h"
#include <memory>
namespace NEO {
using MetricsLibraryApi::ClientApi;
using MetricsLibraryApi::ClientCallbacks_1_0;
using MetricsLibraryApi::ClientData_1_0;
using MetricsLibraryApi::ClientGen;
using MetricsLibraryApi::ClientType_1_0;
using MetricsLibraryApi::CommandBufferData_1_0;
using MetricsLibraryApi::CommandBufferSize_1_0;
using MetricsLibraryApi::ConfigurationActivateData_1_0;
using MetricsLibraryApi::ConfigurationCreateData_1_0;
using MetricsLibraryApi::ConfigurationHandle_1_0;
using MetricsLibraryApi::ContextCreateData_1_0;
using MetricsLibraryApi::ContextCreateFunction_1_0;
using MetricsLibraryApi::ContextDeleteFunction_1_0;
using MetricsLibraryApi::ContextHandle_1_0;
using MetricsLibraryApi::GetReportData_1_0;
using MetricsLibraryApi::GpuConfigurationActivationType;
using MetricsLibraryApi::GpuMemory_1_0;
using MetricsLibraryApi::Interface_1_0;
using MetricsLibraryApi::ObjectType;
using MetricsLibraryApi::ParameterType;
using MetricsLibraryApi::QueryCreateData_1_0;
using MetricsLibraryApi::QueryHandle_1_0;
using MetricsLibraryApi::StatusCode;
using MetricsLibraryApi::TypedValue_1_0;
using MetricsLibraryApi::ValueType;
class MetricsLibraryInterface {
public:
ContextCreateFunction_1_0 contextCreate = nullptr;
ContextDeleteFunction_1_0 contextDelete = nullptr;
Interface_1_0 functions = {};
ClientCallbacks_1_0 callbacks = {};
};
class MetricsLibrary {
public:
MetricsLibrary();
MOCKABLE_VIRTUAL ~MetricsLibrary(){};
// Library open function.
MOCKABLE_VIRTUAL bool open();
// Context create / destroy functions.
MOCKABLE_VIRTUAL bool contextCreate(const ClientType_1_0 &client, ClientData_1_0 &clientData, ContextCreateData_1_0 &createData, ContextHandle_1_0 &handle);
MOCKABLE_VIRTUAL bool contextDelete(const ContextHandle_1_0 &handle);
// HwCounters functions.
MOCKABLE_VIRTUAL bool hwCountersCreate(const ContextHandle_1_0 &context, const uint32_t slots, const ConfigurationHandle_1_0 mmio, QueryHandle_1_0 &handle);
MOCKABLE_VIRTUAL bool hwCountersDelete(const QueryHandle_1_0 &handle);
MOCKABLE_VIRTUAL bool hwCountersGetReport(const QueryHandle_1_0 &handle, const uint32_t slot, const uint32_t slotsCount, const uint32_t dataSize, void *data);
MOCKABLE_VIRTUAL uint32_t hwCountersGetApiReportSize();
MOCKABLE_VIRTUAL uint32_t hwCountersGetGpuReportSize();
// Oa configuration functions.
MOCKABLE_VIRTUAL bool oaConfigurationCreate(const ContextHandle_1_0 &context, ConfigurationHandle_1_0 &handle);
MOCKABLE_VIRTUAL bool oaConfigurationDelete(const ConfigurationHandle_1_0 &handle);
MOCKABLE_VIRTUAL bool oaConfigurationActivate(const ConfigurationHandle_1_0 &handle);
MOCKABLE_VIRTUAL bool oaConfigurationDeactivate(const ConfigurationHandle_1_0 &handle);
// User mmio configuration functions.
MOCKABLE_VIRTUAL bool userConfigurationCreate(const ContextHandle_1_0 &context, ConfigurationHandle_1_0 &handle);
MOCKABLE_VIRTUAL bool userConfigurationDelete(const ConfigurationHandle_1_0 &handle);
// Command buffer functions.
MOCKABLE_VIRTUAL bool commandBufferGet(CommandBufferData_1_0 &data);
MOCKABLE_VIRTUAL bool commandBufferGetSize(const CommandBufferData_1_0 &commandBufferData, CommandBufferSize_1_0 &commandBufferSize);
public:
std::unique_ptr<OsLibrary> osLibrary;
std::unique_ptr<MetricsLibraryInterface> api;
};
} // namespace NEO

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2018 Intel Corporation
* Copyright (C) 2017-2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -18,4 +18,7 @@ extern const char *testDllName;
extern const char *fileSeparator;
// Pci Path
extern const char *sysFsPciPath;
// Os specific Metrics Library name
extern const char *metricsLibraryDllName;
}; // namespace Os

View File

@@ -21,7 +21,6 @@ class OSInterface {
OSInterfaceImpl *get() const {
return osInterfaceImpl;
};
unsigned int getHwContextId() const;
static bool osEnabled64kbPages;
static bool osEnableLocalMemory;
static bool are64kbPagesEnabled();

View File

@@ -7,175 +7,240 @@
#include "runtime/os_interface/performance_counters.h"
#include "runtime/helpers/debug_helpers.h"
#include "runtime/os_interface/os_interface.h"
#include "runtime/os_interface/os_time.h"
#include "runtime/utilities/tag_allocator.h"
#include "CL/cl.h"
using namespace MetricsLibraryApi;
namespace NEO {
decltype(&instrGetPerfCountersQueryData) getPerfCountersQueryDataFactory[IGFX_MAX_CORE] = {
nullptr,
};
size_t perfCountersQuerySize[IGFX_MAX_CORE] = {
0,
};
PerformanceCounters::PerformanceCounters(OSTime *osTime) {
this->osTime = osTime;
DEBUG_BREAK_IF(osTime == nullptr);
gfxFamily = IGFX_UNKNOWN_CORE;
cbData = {
0,
};
this->osInterface = osTime->getOSInterface();
hwMetricsEnabled = false;
useMIRPC = false;
pAutoSamplingInterface = nullptr;
cpuRawTimestamp = 0;
refCounter = 0;
available = false;
reportId = 0;
//////////////////////////////////////////////////////
// PerformanceCounters constructor.
//////////////////////////////////////////////////////
PerformanceCounters::PerformanceCounters() {
metricsLibrary = std::make_unique<MetricsLibrary>();
UNRECOVERABLE_IF(metricsLibrary == nullptr);
}
//////////////////////////////////////////////////////
// PerformanceCounters::getReferenceNumber
//////////////////////////////////////////////////////
uint32_t PerformanceCounters::getReferenceNumber() {
std::lock_guard<std::mutex> lockMutex(mutex);
return referenceCounter;
}
//////////////////////////////////////////////////////
// PerformanceCounters::isAvailable
//////////////////////////////////////////////////////
bool PerformanceCounters::isAvailable() {
return available;
}
//////////////////////////////////////////////////////
// PerformanceCounters::enable
//////////////////////////////////////////////////////
void PerformanceCounters::enable() {
mutex.lock();
std::lock_guard<std::mutex> lg(mutex, std::adopt_lock);
if (refCounter == 0) {
enableImpl();
std::lock_guard<std::mutex> lockMutex(mutex);
if (referenceCounter == 0) {
available = openMetricsLibrary();
}
refCounter++;
referenceCounter++;
}
//////////////////////////////////////////////////////
// PerformanceCounters::shutdown
//////////////////////////////////////////////////////
void PerformanceCounters::shutdown() {
mutex.lock();
std::lock_guard<std::mutex> lg(mutex, std::adopt_lock);
if (refCounter >= 1) {
if (refCounter == 1) {
shutdownImpl();
std::lock_guard<std::mutex> lockMutex(mutex);
if (referenceCounter >= 1) {
if (referenceCounter == 1) {
available = false;
closeMetricsLibrary();
}
refCounter--;
referenceCounter--;
}
}
void PerformanceCounters::initialize(const HardwareInfo *hwInfo) {
useMIRPC = !(hwInfo->workaroundTable.waDoNotUseMIReportPerfCount);
gfxFamily = hwInfo->platform.eRenderCoreFamily;
//////////////////////////////////////////////////////
// PerformanceCounters::getMetricsLibraryInterface
//////////////////////////////////////////////////////
MetricsLibrary *PerformanceCounters::getMetricsLibraryInterface() {
return metricsLibrary.get();
}
if (getPerfCountersQueryDataFactory[gfxFamily] != nullptr) {
getPerfCountersQueryDataFunc = getPerfCountersQueryDataFactory[gfxFamily];
} else {
perfCountersQuerySize[gfxFamily] = sizeof(GTDI_QUERY);
//////////////////////////////////////////////////////
// PerformanceCounters::setMetricsLibraryInterface
//////////////////////////////////////////////////////
void PerformanceCounters::setMetricsLibraryInterface(std::unique_ptr<MetricsLibrary> newMetricsLibrary) {
metricsLibrary = std::move(newMetricsLibrary);
}
//////////////////////////////////////////////////////
// PerformanceCounters::getMetricsLibraryContext
//////////////////////////////////////////////////////
ContextHandle_1_0 PerformanceCounters::getMetricsLibraryContext() {
return context;
}
//////////////////////////////////////////////////////
// PerformanceCounters::openMetricsLibrary
//////////////////////////////////////////////////////
bool PerformanceCounters::openMetricsLibrary() {
// Open metrics library.
bool result = metricsLibrary->open();
DEBUG_BREAK_IF(!result);
// Create metrics library context.
if (result) {
result = metricsLibrary->contextCreate(
clientType,
clientData,
contextData,
context);
// Validate gpu report size.
DEBUG_BREAK_IF(!metricsLibrary->hwCountersGetGpuReportSize());
}
// Error handling.
if (!result) {
closeMetricsLibrary();
}
return result;
}
//////////////////////////////////////////////////////
// PerformanceCounters::closeMetricsLibrary
//////////////////////////////////////////////////////
void PerformanceCounters::closeMetricsLibrary() {
// Destroy oa/user mmio configuration.
releaseCountersConfiguration();
// Destroy hw counters query.
if (query.IsValid()) {
metricsLibrary->hwCountersDelete(query);
}
// Destroy metrics library context.
if (context.IsValid()) {
metricsLibrary->contextDelete(context);
}
}
void PerformanceCounters::enableImpl() {
hwMetricsEnabled = hwMetricsEnableFunc(cbData, true);
if (!pAutoSamplingInterface && hwMetricsEnabled) {
autoSamplingStartFunc(cbData, &pAutoSamplingInterface);
if (pAutoSamplingInterface) {
available = true;
//////////////////////////////////////////////////////
// PerformanceCounters::getQueryHandle
//////////////////////////////////////////////////////
QueryHandle_1_0 PerformanceCounters::getQueryHandle() {
if (!query.IsValid()) {
metricsLibrary->hwCountersCreate(
context,
1,
userConfiguration,
query);
}
DEBUG_BREAK_IF(!query.IsValid());
return query;
}
//////////////////////////////////////////////////////
// PerformanceCounters::getGpuCommandsSize
//////////////////////////////////////////////////////
uint32_t PerformanceCounters::getGpuCommandsSize(
const bool begin) {
CommandBufferData_1_0 bufferData = {};
CommandBufferSize_1_0 bufferSize = {};
if (begin) {
// Load currently activated (through metrics discovery) oa/user mmio configuration and use it.
// It will allow to change counters configuration between subsequent clEnqueueNDCommandRange calls.
if (!enableCountersConfiguration()) {
return 0;
}
}
bufferData.HandleContext = context;
bufferData.Type = GpuCommandBufferType::Render;
bufferData.CommandsType = ObjectType::QueryHwCounters;
bufferData.QueryHwCounters.Begin = begin;
bufferData.QueryHwCounters.Handle = getQueryHandle();
bufferData.QueryHwCounters.HandleUserConfiguration = userConfiguration;
return metricsLibrary->commandBufferGetSize(bufferData, bufferSize)
? bufferSize.GpuMemorySize
: 0;
}
void PerformanceCounters::shutdownImpl() {
if (hwMetricsEnabled) {
hwMetricsEnableFunc(cbData, false);
hwMetricsEnabled = false;
}
if (pAutoSamplingInterface) {
autoSamplingStopFunc(&pAutoSamplingInterface);
pAutoSamplingInterface = nullptr;
available = false;
}
//////////////////////////////////////////////////////
// PerformanceCounters::getGpuCommands
//////////////////////////////////////////////////////
bool PerformanceCounters::getGpuCommands(
TagNode<HwPerfCounter> &performanceCounters,
const bool begin,
const uint32_t bufferSize,
void *pBuffer) {
// Command Buffer data.
CommandBufferData_1_0 bufferData = {};
bufferData.HandleContext = context;
bufferData.Type = GpuCommandBufferType::Render;
bufferData.CommandsType = ObjectType::QueryHwCounters;
bufferData.Data = pBuffer;
bufferData.Size = bufferSize;
// Gpu memory allocation for query hw counters.
bufferData.Allocation.CpuAddress = reinterpret_cast<uint8_t *>(performanceCounters.tagForCpuAccess);
bufferData.Allocation.GpuAddress = performanceCounters.getGpuAddress();
// Query hw counters specific data.
bufferData.QueryHwCounters.Begin = begin;
bufferData.QueryHwCounters.Handle = getQueryHandle();
bufferData.QueryHwCounters.HandleUserConfiguration = userConfiguration;
return metricsLibrary->commandBufferGet(bufferData);
}
void PerformanceCounters::setCpuTimestamp() {
cpuRawTimestamp = osTime->getCpuRawTimestamp();
//////////////////////////////////////////////////////
// PerformanceCounters::getApiReportSize
//////////////////////////////////////////////////////
uint32_t PerformanceCounters::getApiReportSize() {
return metricsLibrary->hwCountersGetApiReportSize();
}
InstrPmRegsCfg *PerformanceCounters::getPmRegsCfg(uint32_t configuration) {
if (!hwMetricsEnabled) {
return nullptr;
//////////////////////////////////////////////////////
// PerformanceCounters::getGpuReportSize
//////////////////////////////////////////////////////
uint32_t PerformanceCounters::getGpuReportSize() {
return metricsLibrary->hwCountersGetGpuReportSize();
}
//////////////////////////////////////////////////////
// PerformanceCounters::getApiReport
//////////////////////////////////////////////////////
bool PerformanceCounters::getApiReport(const size_t inputParamSize, void *pInputParam, size_t *pOutputParamSize, bool isEventComplete) {
const uint32_t outputSize = metricsLibrary->hwCountersGetApiReportSize();
if (pOutputParamSize) {
*pOutputParamSize = outputSize;
}
switch (configuration) {
case GTDI_CONFIGURATION_SET_DYNAMIC:
case GTDI_CONFIGURATION_SET_1:
case GTDI_CONFIGURATION_SET_2:
case GTDI_CONFIGURATION_SET_3:
break;
default:
return nullptr;
}
InstrPmRegsCfg *pPmRegsCfg = new InstrPmRegsCfg();
pPmRegsCfg->OaCounters.Handle = INSTR_PM_REGS_CFG_INVALID;
mutex.lock();
std::lock_guard<std::mutex> lg(mutex, std::adopt_lock);
if (getPmRegsCfgFunc(cbData, configuration, pPmRegsCfg, nullptr)) {
return pPmRegsCfg;
}
delete pPmRegsCfg;
return nullptr;
}
bool PerformanceCounters::verifyPmRegsCfg(InstrPmRegsCfg *pCfg, uint32_t *pLastPmRegsCfgHandle, bool *pLastPmRegsCfgPending) {
if (pCfg == nullptr || pLastPmRegsCfgHandle == nullptr || pLastPmRegsCfgPending == nullptr) {
return false;
}
if (checkPmRegsCfgFunc(pCfg, pLastPmRegsCfgHandle, pAutoSamplingInterface)) {
if (loadPmRegsCfgFunc(cbData, pCfg, 1)) {
return true;
}
}
return false;
}
bool PerformanceCounters::sendPmRegsCfgCommands(InstrPmRegsCfg *pCfg, uint32_t *pLastPmRegsCfgHandle, bool *pLastPmRegsCfgPending) {
if (verifyPmRegsCfg(pCfg, pLastPmRegsCfgHandle, pLastPmRegsCfgPending)) {
*pLastPmRegsCfgPending = true;
if (pInputParam == nullptr && inputParamSize == 0 && pOutputParamSize) {
return true;
}
return false;
}
bool PerformanceCounters::processEventReport(size_t inputParamSize, void *inputParam, size_t *outputParamSize, HwPerfCounter *pPrivateData, InstrPmRegsCfg *countersConfiguration, bool isEventComplete) {
size_t outputSize = perfCountersQuerySize[gfxFamily];
if (outputParamSize) {
*outputParamSize = outputSize;
}
if (inputParam == nullptr && inputParamSize == 0 && outputParamSize) {
return true;
}
if (inputParam == nullptr || isEventComplete == false) {
if (pInputParam == nullptr || isEventComplete == false) {
return false;
}
if (inputParamSize < outputSize) {
return false;
}
GTDI_QUERY *pClientData = static_cast<GTDI_QUERY *>(inputParam);
getPerfCountersQueryDataFunc(cbData, pClientData, &pPrivateData->HWPerfCounters,
cpuRawTimestamp, pAutoSamplingInterface, countersConfiguration, useMIRPC, true, nullptr);
return true;
}
int PerformanceCounters::sendPerfConfiguration(uint32_t count, uint32_t *pOffsets, uint32_t *pValues) {
bool ret = false;
if (count == 0 || pOffsets == NULL || pValues == NULL) {
return CL_INVALID_VALUE;
}
mutex.lock();
std::lock_guard<std::mutex> lg(mutex, std::adopt_lock);
if (pOffsets[0] != INSTR_READ_REGS_CFG_TAG) {
ret = setPmRegsCfgFunc(cbData, count, pOffsets, pValues);
} else if (count > 1) {
ret = sendReadRegsCfgFunc(cbData, count - 1, pOffsets + 1, pValues + 1);
}
return ret ? CL_SUCCESS : CL_PROFILING_INFO_NOT_AVAILABLE;
}
uint32_t PerformanceCounters::getCurrentReportId() {
return (osInterface->getHwContextId() << 12) | getReportId();
return metricsLibrary->hwCountersGetReport(query, 0, 1, outputSize, pInputParam);
}
} // namespace NEO

View File

@@ -7,71 +7,105 @@
#pragma once
#include "runtime/event/perf_counter.h"
#include "runtime/helpers/hw_info.h"
#include "runtime/os_interface/metrics_library.h"
#include "CL/cl.h"
#include <memory>
#include <mutex>
namespace NEO {
struct HardwareInfo;
class OSInterface;
class OSTime;
//////////////////////////////////////////////////////
// Forward declaration.
//////////////////////////////////////////////////////
template <typename Node>
struct TagNode;
//////////////////////////////////////////////////////
// Performance counters implementation.
//////////////////////////////////////////////////////
class PerformanceCounters {
public:
static std::unique_ptr<PerformanceCounters> create(OSTime *osTime);
//////////////////////////////////////////////////////
// Constructor/destructor.
//////////////////////////////////////////////////////
PerformanceCounters();
virtual ~PerformanceCounters() = default;
//////////////////////////////////////////////////////
// Performance counters creation.
//////////////////////////////////////////////////////
static std::unique_ptr<PerformanceCounters> create(class Device *device);
void enable();
void shutdown();
virtual void initialize(const HardwareInfo *hwInfo);
InstrPmRegsCfg *getPmRegsCfg(uint32_t configuration);
bool sendPmRegsCfgCommands(InstrPmRegsCfg *pCfg, uint32_t *pLastPmRegsCfgHandle, bool *pLastPmRegsCfgPending);
void setCpuTimestamp();
bool processEventReport(size_t pClientDataSize, void *pClientData, size_t *outputSize, HwPerfCounter *pPrivateData, InstrPmRegsCfg *countersConfiguration, bool isEventComplete);
int sendPerfConfiguration(uint32_t count, uint32_t *pOffsets, uint32_t *pValues);
uint32_t getCurrentReportId();
bool isAvailable();
uint32_t getReferenceNumber();
uint32_t getPerfCountersReferenceNumber() {
mutex.lock();
std::lock_guard<std::mutex> lg(mutex, std::adopt_lock);
/////////////////////////////////////////////////////
// Gpu oa/mmio configuration.
/////////////////////////////////////////////////////
virtual bool enableCountersConfiguration() = 0;
virtual void releaseCountersConfiguration() = 0;
return refCounter;
}
//////////////////////////////////////////////////////
// Gpu commands.
//////////////////////////////////////////////////////
uint32_t getGpuCommandsSize(const bool begin);
bool getGpuCommands(TagNode<HwPerfCounter> &performanceCounters, const bool begin, const uint32_t bufferSize, void *pBuffer);
bool isAvailable() {
return available;
}
/////////////////////////////////////////////////////
// Gpu/Api reports.
/////////////////////////////////////////////////////
uint32_t getApiReportSize();
uint32_t getGpuReportSize();
bool getApiReport(const size_t inputParamSize, void *pClientData, size_t *pOutputSize, bool isEventComplete);
/////////////////////////////////////////////////////
// Metrics Library interface.
/////////////////////////////////////////////////////
MetricsLibrary *getMetricsLibraryInterface();
void setMetricsLibraryInterface(std::unique_ptr<MetricsLibrary> newMetricsLibrary);
bool openMetricsLibrary();
void closeMetricsLibrary();
/////////////////////////////////////////////////////
// Metrics Library context/query handles.
/////////////////////////////////////////////////////
ContextHandle_1_0 getMetricsLibraryContext();
QueryHandle_1_0 getQueryHandle();
protected:
PerformanceCounters(OSTime *osTime);
virtual bool verifyPmRegsCfg(InstrPmRegsCfg *pCfg, uint32_t *pLastPmRegsCfgHandle, bool *pLastPmRegsCfgPending);
virtual void enableImpl();
void shutdownImpl();
MOCKABLE_VIRTUAL uint32_t getReportId() {
return ++reportId & 0xFFF;
}
GFXCORE_FAMILY gfxFamily;
InstrEscCbData cbData;
OSInterface *osInterface;
OSTime *osTime;
bool hwMetricsEnabled;
bool useMIRPC;
void *pAutoSamplingInterface;
uint64_t cpuRawTimestamp;
/////////////////////////////////////////////////////
// Common members.
/////////////////////////////////////////////////////
std::mutex mutex;
uint32_t refCounter;
bool available;
uint32_t reportId;
decltype(&instrAutoSamplingStart) autoSamplingStartFunc = instrAutoSamplingStart;
decltype(&instrAutoSamplingStop) autoSamplingStopFunc = instrAutoSamplingStop;
decltype(&instrCheckPmRegsCfg) checkPmRegsCfgFunc = instrCheckPmRegsCfg;
decltype(&instrGetPerfCountersQueryData) getPerfCountersQueryDataFunc = instrGetPerfCountersQueryData;
decltype(&instrEscGetPmRegsCfg) getPmRegsCfgFunc = instrEscGetPmRegsCfg;
decltype(&instrEscHwMetricsEnable) hwMetricsEnableFunc = instrEscHwMetricsEnable;
decltype(&instrEscLoadPmRegsCfg) loadPmRegsCfgFunc = instrEscLoadPmRegsCfg;
decltype(&instrEscSetPmRegsCfg) setPmRegsCfgFunc = instrEscSetPmRegsCfg;
decltype(&instrEscSendReadRegsCfg) sendReadRegsCfgFunc = instrEscSendReadRegsCfg;
uint32_t referenceCounter = 0;
bool available = false;
/////////////////////////////////////////////////////
// Metrics Library interface.
/////////////////////////////////////////////////////
std::unique_ptr<MetricsLibrary> metricsLibrary = {};
/////////////////////////////////////////////////////
// Metrics Library client data.
/////////////////////////////////////////////////////
ClientData_1_0 clientData = {};
ClientType_1_0 clientType = {ClientApi::OpenCL, ClientGen::Unknown};
/////////////////////////////////////////////////////
// Metrics Library context.
/////////////////////////////////////////////////////
ContextCreateData_1_0 contextData = {};
ContextHandle_1_0 context = {};
/////////////////////////////////////////////////////
// Metrics Library oa/mmio counters configuration.
/////////////////////////////////////////////////////
ConfigurationHandle_1_0 oaConfiguration = {};
ConfigurationHandle_1_0 userConfiguration = {};
/////////////////////////////////////////////////////
// Metrics Library query object.
/////////////////////////////////////////////////////
QueryHandle_1_0 query = {};
};
} // namespace NEO

View File

@@ -36,6 +36,7 @@ set(RUNTIME_SRCS_OS_INTERFACE_WINDOWS
${CMAKE_CURRENT_SOURCE_DIR}/os_library.cpp
${CMAKE_CURRENT_SOURCE_DIR}/os_library.h
${CMAKE_CURRENT_SOURCE_DIR}/os_memory_win.cpp
${CMAKE_CURRENT_SOURCE_DIR}/os_metrics_library.cpp
${CMAKE_CURRENT_SOURCE_DIR}/os_socket.h
${CMAKE_CURRENT_SOURCE_DIR}/os_thread_win.cpp
${CMAKE_CURRENT_SOURCE_DIR}/os_thread_win.h

View File

@@ -22,10 +22,6 @@ OSInterface::~OSInterface() {
delete osInterfaceImpl;
}
uint32_t OSInterface::getHwContextId() const {
return osInterfaceImpl->getHwContextId();
}
uint32_t OSInterface::getDeviceHandle() const {
return static_cast<uint32_t>(osInterfaceImpl->getDeviceHandle());
}

View File

@@ -0,0 +1,55 @@
/*
* Copyright (C) 2017-2019 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "runtime/os_interface/metrics_library.h"
namespace NEO {
//////////////////////////////////////////////////////
// FUNCTION: MetricsLibrary::oaConfigurationActivate
//////////////////////////////////////////////////////
bool MetricsLibrary::oaConfigurationActivate(
const ConfigurationHandle_1_0 &handle) {
ConfigurationActivateData_1_0 data = {};
data.Type = GpuConfigurationActivationType::EscapeCode;
return api->functions.ConfigurationActivate(
handle,
&data) == StatusCode::Success;
}
//////////////////////////////////////////////////////
// FUNCTION: MetricsLibrary::oaConfigurationDeactivate
//////////////////////////////////////////////////////
bool MetricsLibrary::oaConfigurationDeactivate(
const ConfigurationHandle_1_0 &handle) {
return api->functions.ConfigurationDeactivate(
handle) == StatusCode::Success;
}
//////////////////////////////////////////////////////
// FUNCTION: MetricsLibrary::userConfigurationCreate
//////////////////////////////////////////////////////
bool MetricsLibrary::userConfigurationCreate(
const ContextHandle_1_0 &context,
ConfigurationHandle_1_0 &handle) {
ConfigurationCreateData_1_0 data = {};
data.HandleContext = context;
data.Type = ObjectType::ConfigurationHwCountersUser;
return api->functions.ConfigurationCreate(
&data,
&handle) == StatusCode::Success;
}
//////////////////////////////////////////////////////
// FUNCTION: MetricsLibrary::userConfigurationDelete
//////////////////////////////////////////////////////
bool MetricsLibrary::userConfigurationDelete(
const ConfigurationHandle_1_0 &handle) {
return api->functions.ConfigurationDelete(handle) == StatusCode::Success;
}
} // namespace NEO

View File

@@ -7,31 +7,82 @@
#include "performance_counters_win.h"
#include "runtime/device/device.h"
#include "runtime/helpers/hw_helper.h"
#include "runtime/os_interface/windows/os_interface.h"
#include "runtime/os_interface/windows/windows_wrapper.h"
#include "runtime/os_interface/windows/os_time_win.h"
namespace NEO {
std::unique_ptr<PerformanceCounters> PerformanceCounters::create(OSTime *osTime) {
return std::unique_ptr<PerformanceCounters>(new PerformanceCountersWin(osTime));
}
PerformanceCountersWin::PerformanceCountersWin(OSTime *osTime) : PerformanceCounters(osTime) {
cbData.hAdapter = (void *)(UINT_PTR)osInterface->get()->getAdapterHandle();
cbData.hDevice = (void *)(UINT_PTR)osInterface->get()->getDeviceHandle();
cbData.pfnEscapeCb = osInterface->get()->getEscapeHandle();
/////////////////////////////////////////////////////
// PerformanceCounters::create
/////////////////////////////////////////////////////
std::unique_ptr<PerformanceCounters> PerformanceCounters::create(Device *device) {
auto counter = std::make_unique<PerformanceCountersWin>();
auto osInterface = device->getOSTime()->getOSInterface()->get();
auto gen = device->getHardwareInfo().platform.eRenderCoreFamily;
auto &hwHelper = HwHelper::get(gen);
UNRECOVERABLE_IF(counter == nullptr);
counter->clientData.Windows.Adapter = reinterpret_cast<void *>(static_cast<UINT_PTR>(osInterface->getAdapterHandle()));
counter->clientData.Windows.Device = reinterpret_cast<void *>(static_cast<UINT_PTR>(osInterface->getDeviceHandle()));
counter->clientData.Windows.Device = reinterpret_cast<void *>(static_cast<UINT_PTR>(osInterface->getDeviceHandle()));
counter->clientData.Windows.Escape = osInterface->getEscapeHandle();
counter->clientData.Windows.KmdInstrumentationEnabled = device->getHardwareInfo().capabilityTable.instrumentationEnabled;
counter->contextData.ClientData = &counter->clientData;
counter->clientType.Gen = static_cast<MetricsLibraryApi::ClientGen>(hwHelper.getMetricsLibraryGenId());
return counter;
}
PerformanceCountersWin::~PerformanceCountersWin() {
if (pAutoSamplingInterface) {
autoSamplingStopFunc(&pAutoSamplingInterface);
pAutoSamplingInterface = nullptr;
available = false;
//////////////////////////////////////////////////////
// PerformanceCountersWin::enableCountersConfiguration
//////////////////////////////////////////////////////
bool PerformanceCountersWin::enableCountersConfiguration() {
// Release previous counters configuration so the user
// can change configuration between kernels.
releaseCountersConfiguration();
// Create mmio user configuration.
if (!metricsLibrary->userConfigurationCreate(
context,
userConfiguration)) {
DEBUG_BREAK_IF(true);
return false;
}
// Create oa configuration.
if (!metricsLibrary->oaConfigurationCreate(
context,
oaConfiguration)) {
DEBUG_BREAK_IF(true);
return false;
}
// Enable oa configuration.
if (!metricsLibrary->oaConfigurationActivate(
oaConfiguration)) {
DEBUG_BREAK_IF(true);
return false;
}
return true;
}
//////////////////////////////////////////////////////
// PerformanceCountersWin::releaseCountersConfiguration
//////////////////////////////////////////////////////
void PerformanceCountersWin::releaseCountersConfiguration() {
// Mmio user configuration.
if (userConfiguration.IsValid()) {
metricsLibrary->userConfigurationDelete(userConfiguration);
userConfiguration.data = nullptr;
}
// Oa configuration.
if (oaConfiguration.IsValid()) {
metricsLibrary->oaConfigurationDeactivate(oaConfiguration);
metricsLibrary->oaConfigurationDelete(oaConfiguration);
oaConfiguration.data = nullptr;
}
}
void PerformanceCountersWin::initialize(const HardwareInfo *hwInfo) {
PerformanceCounters::initialize(hwInfo);
setAvailableFunc(true);
verifyEnableFunc(cbData);
}
} // namespace NEO

View File

@@ -7,18 +7,18 @@
#pragma once
#include "runtime/os_interface/performance_counters.h"
#include "runtime/os_interface/windows/os_interface.h"
namespace NEO {
class PerformanceCountersWin : virtual public PerformanceCounters {
public:
PerformanceCountersWin(OSTime *osTime);
~PerformanceCountersWin() override;
void initialize(const HardwareInfo *hwInfo) override;
PerformanceCountersWin() = default;
~PerformanceCountersWin() override = default;
protected:
decltype(&instrSetAvailable) setAvailableFunc = instrSetAvailable;
decltype(&instrEscVerifyEnable) verifyEnableFunc = instrEscVerifyEnable;
/////////////////////////////////////////////////////
// Gpu oa/mmio configuration.
/////////////////////////////////////////////////////
bool enableCountersConfiguration() override;
void releaseCountersConfiguration() override;
};
} // namespace NEO

View File

@@ -51,9 +51,11 @@ class TagAllocator {
public:
using NodeType = TagNode<TagType>;
TagAllocator(MemoryManager *memMngr, size_t tagCount, size_t tagAlignment) : memoryManager(memMngr),
tagCount(tagCount),
tagAlignment(tagAlignment) {
TagAllocator(MemoryManager *memMngr, size_t tagCount, size_t tagAlignment, size_t tagSize = sizeof(TagType)) : memoryManager(memMngr),
tagCount(tagCount),
tagAlignment(tagAlignment) {
this->tagSize = alignUp(tagSize, tagAlignment);
populateFreeTags();
}
@@ -109,6 +111,7 @@ class TagAllocator {
MemoryManager *memoryManager;
size_t tagCount;
size_t tagAlignment;
size_t tagSize;
std::mutex allocatorMutex;
@@ -126,7 +129,6 @@ class TagAllocator {
}
void populateFreeTags() {
size_t tagSize = alignUp(sizeof(TagType), tagAlignment);
size_t allocationSizeRequired = tagCount * tagSize;
auto allocationType = TagType::getAllocationType();