mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-04 23:56:39 +08:00
Use GPU pointers for HwPerfCounter.
Related-To: NEO-2872 Change-Id: Ia30f2ee0d96a3da05b8e5ecf55e9b7fb5a34ace7 Signed-off-by: Piotr Fusik <piotr.fusik@intel.com>
This commit is contained in:
@@ -362,15 +362,15 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
|||||||
return commandQueueProperties;
|
return commandQueueProperties;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isProfilingEnabled() {
|
bool isProfilingEnabled() const {
|
||||||
return !!(this->getCommandQueueProperties() & CL_QUEUE_PROFILING_ENABLE);
|
return !!(this->getCommandQueueProperties() & CL_QUEUE_PROFILING_ENABLE);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isOOQEnabled() {
|
bool isOOQEnabled() const {
|
||||||
return !!(this->getCommandQueueProperties() & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE);
|
return !!(this->getCommandQueueProperties() & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isPerfCountersEnabled() {
|
bool isPerfCountersEnabled() const {
|
||||||
return perfCountersEnabled;
|
return perfCountersEnabled;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -388,7 +388,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
|||||||
this->isSpecialCommandQueue = newValue;
|
this->isSpecialCommandQueue = newValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint16_t getPerfCountersUserRegistersNumber() {
|
uint16_t getPerfCountersUserRegistersNumber() const {
|
||||||
return perfCountersUserRegistersNumber;
|
return perfCountersUserRegistersNumber;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -163,8 +163,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool profilingRequired = (this->isProfilingEnabled() && event != nullptr);
|
bool profilingRequired = (this->isProfilingEnabled() && event != nullptr);
|
||||||
bool perfCountersRequired = false;
|
bool perfCountersRequired = (this->isPerfCountersEnabled() && event != nullptr);
|
||||||
perfCountersRequired = (this->isPerfCountersEnabled() && event != nullptr);
|
|
||||||
KernelOperation *blockedCommandsData = nullptr;
|
KernelOperation *blockedCommandsData = nullptr;
|
||||||
std::unique_ptr<PrintfHandler> printfHandler;
|
std::unique_ptr<PrintfHandler> printfHandler;
|
||||||
bool slmUsed = multiDispatchInfo.usesSlm() || parentKernel;
|
bool slmUsed = multiDispatchInfo.usesSlm() || parentKernel;
|
||||||
@@ -381,7 +380,7 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
|
|||||||
KernelOperation *&blockedCommandsData,
|
KernelOperation *&blockedCommandsData,
|
||||||
TimestampPacketContainer &previousTimestampPacketNodes,
|
TimestampPacketContainer &previousTimestampPacketNodes,
|
||||||
PreemptionMode preemption) {
|
PreemptionMode preemption) {
|
||||||
HwPerfCounter *hwPerfCounter = nullptr;
|
TagNode<HwPerfCounter> *hwPerfCounter = nullptr;
|
||||||
DebugManager.dumpKernelArgs(&multiDispatchInfo);
|
DebugManager.dumpKernelArgs(&multiDispatchInfo);
|
||||||
|
|
||||||
printfHandler.reset(PrintfHandler::create(multiDispatchInfo, *device));
|
printfHandler.reset(PrintfHandler::create(multiDispatchInfo, *device));
|
||||||
@@ -399,7 +398,7 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
|
|||||||
// Get allocation for timestamps
|
// Get allocation for timestamps
|
||||||
hwTimeStamps = event->getHwTimeStampNode();
|
hwTimeStamps = event->getHwTimeStampNode();
|
||||||
if (this->isPerfCountersEnabled()) {
|
if (this->isPerfCountersEnabled()) {
|
||||||
hwPerfCounter = event->getHwPerfCounterNode()->tagForCpuAccess;
|
hwPerfCounter = event->getHwPerfCounterNode();
|
||||||
// PERF COUNTER: copy current configuration from queue to event
|
// PERF COUNTER: copy current configuration from queue to event
|
||||||
event->copyPerfCounters(this->getPerfCountersConfigData());
|
event->copyPerfCounters(this->getPerfCountersConfigData());
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -114,15 +114,6 @@ inline cl_uint computeDimensions(const size_t workItems[3]) {
|
|||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
class GpgpuWalkerHelper {
|
class GpgpuWalkerHelper {
|
||||||
public:
|
public:
|
||||||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
|
||||||
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
|
|
||||||
|
|
||||||
static void addAluReadModifyWriteRegister(
|
|
||||||
LinearStream *pCommandStream,
|
|
||||||
uint32_t aluRegister,
|
|
||||||
uint32_t operation,
|
|
||||||
uint32_t mask);
|
|
||||||
|
|
||||||
static void applyWADisableLSQCROPERFforOCL(LinearStream *pCommandStream,
|
static void applyWADisableLSQCROPERFforOCL(LinearStream *pCommandStream,
|
||||||
const Kernel &kernel,
|
const Kernel &kernel,
|
||||||
bool disablePerfMode);
|
bool disablePerfMode);
|
||||||
@@ -143,50 +134,21 @@ class GpgpuWalkerHelper {
|
|||||||
|
|
||||||
static void dispatchProfilingCommandsStart(
|
static void dispatchProfilingCommandsStart(
|
||||||
TagNode<HwTimeStamps> &hwTimeStamps,
|
TagNode<HwTimeStamps> &hwTimeStamps,
|
||||||
NEO::LinearStream *commandStream);
|
LinearStream *commandStream);
|
||||||
|
|
||||||
static void dispatchProfilingCommandsEnd(
|
static void dispatchProfilingCommandsEnd(
|
||||||
TagNode<HwTimeStamps> &hwTimeStamps,
|
TagNode<HwTimeStamps> &hwTimeStamps,
|
||||||
NEO::LinearStream *commandStream);
|
LinearStream *commandStream);
|
||||||
|
|
||||||
static void dispatchPerfCountersNoopidRegisterCommands(
|
|
||||||
CommandQueue &commandQueue,
|
|
||||||
NEO::HwPerfCounter &hwPerfCounter,
|
|
||||||
NEO::LinearStream *commandStream,
|
|
||||||
bool start);
|
|
||||||
|
|
||||||
static void dispatchPerfCountersReadFreqRegisterCommands(
|
|
||||||
CommandQueue &commandQueue,
|
|
||||||
NEO::HwPerfCounter &hwPerfCounter,
|
|
||||||
NEO::LinearStream *commandStream,
|
|
||||||
bool start);
|
|
||||||
|
|
||||||
static void dispatchPerfCountersGeneralPurposeCounterCommands(
|
|
||||||
CommandQueue &commandQueue,
|
|
||||||
NEO::HwPerfCounter &hwPerfCounter,
|
|
||||||
NEO::LinearStream *commandStream,
|
|
||||||
bool start);
|
|
||||||
|
|
||||||
static void dispatchPerfCountersUserCounterCommands(
|
|
||||||
CommandQueue &commandQueue,
|
|
||||||
NEO::HwPerfCounter &hwPerfCounter,
|
|
||||||
NEO::LinearStream *commandStream,
|
|
||||||
bool start);
|
|
||||||
|
|
||||||
static void dispatchPerfCountersOABufferStateCommands(
|
|
||||||
CommandQueue &commandQueue,
|
|
||||||
NEO::HwPerfCounter &hwPerfCounter,
|
|
||||||
NEO::LinearStream *commandStream);
|
|
||||||
|
|
||||||
static void dispatchPerfCountersCommandsStart(
|
static void dispatchPerfCountersCommandsStart(
|
||||||
CommandQueue &commandQueue,
|
CommandQueue &commandQueue,
|
||||||
NEO::HwPerfCounter &hwPerfCounter,
|
TagNode<HwPerfCounter> &hwPerfCounter,
|
||||||
NEO::LinearStream *commandStream);
|
LinearStream *commandStream);
|
||||||
|
|
||||||
static void dispatchPerfCountersCommandsEnd(
|
static void dispatchPerfCountersCommandsEnd(
|
||||||
CommandQueue &commandQueue,
|
CommandQueue &commandQueue,
|
||||||
NEO::HwPerfCounter &hwPerfCounter,
|
TagNode<HwPerfCounter> &hwPerfCounter,
|
||||||
NEO::LinearStream *commandStream);
|
LinearStream *commandStream);
|
||||||
|
|
||||||
static void setupTimestampPacket(
|
static void setupTimestampPacket(
|
||||||
LinearStream *cmdStream,
|
LinearStream *cmdStream,
|
||||||
@@ -203,6 +165,36 @@ class GpgpuWalkerHelper {
|
|||||||
IndirectHeap *dsh);
|
IndirectHeap *dsh);
|
||||||
|
|
||||||
static void adjustMiStoreRegMemMode(MI_STORE_REG_MEM<GfxFamily> *storeCmd);
|
static void adjustMiStoreRegMemMode(MI_STORE_REG_MEM<GfxFamily> *storeCmd);
|
||||||
|
|
||||||
|
private:
|
||||||
|
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||||
|
|
||||||
|
static void addAluReadModifyWriteRegister(
|
||||||
|
LinearStream *pCommandStream,
|
||||||
|
uint32_t aluRegister,
|
||||||
|
uint32_t operation,
|
||||||
|
uint32_t mask);
|
||||||
|
|
||||||
|
static void dispatchStoreRegisterCommand(
|
||||||
|
LinearStream *commandStream,
|
||||||
|
uint64_t memoryAddress,
|
||||||
|
uint32_t registerAddress);
|
||||||
|
|
||||||
|
static void dispatchPerfCountersGeneralPurposeCounterCommands(
|
||||||
|
CommandQueue &commandQueue,
|
||||||
|
TagNode<HwPerfCounter> &hwPerfCounter,
|
||||||
|
LinearStream *commandStream,
|
||||||
|
bool start);
|
||||||
|
|
||||||
|
static void dispatchPerfCountersUserCounterCommands(
|
||||||
|
CommandQueue &commandQueue,
|
||||||
|
TagNode<HwPerfCounter> &hwPerfCounter,
|
||||||
|
LinearStream *commandStream,
|
||||||
|
bool start);
|
||||||
|
|
||||||
|
static void dispatchPerfCountersOABufferStateCommands(
|
||||||
|
TagNode<HwPerfCounter> &hwPerfCounter,
|
||||||
|
LinearStream *commandStream);
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
|
|||||||
@@ -42,14 +42,14 @@ void GpgpuWalkerHelper<GfxFamily>::addAluReadModifyWriteRegister(
|
|||||||
typedef typename GfxFamily::MI_LOAD_REGISTER_REG MI_LOAD_REGISTER_REG;
|
typedef typename GfxFamily::MI_LOAD_REGISTER_REG MI_LOAD_REGISTER_REG;
|
||||||
typedef typename GfxFamily::MI_MATH MI_MATH;
|
typedef typename GfxFamily::MI_MATH MI_MATH;
|
||||||
typedef typename GfxFamily::MI_MATH_ALU_INST_INLINE MI_MATH_ALU_INST_INLINE;
|
typedef typename GfxFamily::MI_MATH_ALU_INST_INLINE MI_MATH_ALU_INST_INLINE;
|
||||||
auto pCmd = reinterpret_cast<MI_LOAD_REGISTER_REG *>(pCommandStream->getSpace(sizeof(MI_LOAD_REGISTER_REG)));
|
auto pCmd = pCommandStream->getSpaceForCmd<MI_LOAD_REGISTER_REG>();
|
||||||
*pCmd = GfxFamily::cmdInitLoadRegisterReg;
|
*pCmd = GfxFamily::cmdInitLoadRegisterReg;
|
||||||
pCmd->setSourceRegisterAddress(aluRegister);
|
pCmd->setSourceRegisterAddress(aluRegister);
|
||||||
pCmd->setDestinationRegisterAddress(CS_GPR_R0);
|
pCmd->setDestinationRegisterAddress(CS_GPR_R0);
|
||||||
|
|
||||||
// Load "Mask" into CS_GPR_R1
|
// Load "Mask" into CS_GPR_R1
|
||||||
typedef typename GfxFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
|
typedef typename GfxFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM;
|
||||||
auto pCmd2 = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(pCommandStream->getSpace(sizeof(MI_LOAD_REGISTER_IMM)));
|
auto pCmd2 = pCommandStream->getSpaceForCmd<MI_LOAD_REGISTER_IMM>();
|
||||||
*pCmd2 = GfxFamily::cmdInitLoadRegisterImm;
|
*pCmd2 = GfxFamily::cmdInitLoadRegisterImm;
|
||||||
pCmd2->setRegisterOffset(CS_GPR_R1);
|
pCmd2->setRegisterOffset(CS_GPR_R1);
|
||||||
pCmd2->setDataDword(mask);
|
pCmd2->setDataDword(mask);
|
||||||
@@ -88,13 +88,13 @@ void GpgpuWalkerHelper<GfxFamily>::addAluReadModifyWriteRegister(
|
|||||||
pAluParam->DW0.BitField.Operand2 = ALU_REGISTER_R_ACCU;
|
pAluParam->DW0.BitField.Operand2 = ALU_REGISTER_R_ACCU;
|
||||||
|
|
||||||
// LOAD value of CS_GPR_R0 into "Register"
|
// LOAD value of CS_GPR_R0 into "Register"
|
||||||
auto pCmd4 = reinterpret_cast<MI_LOAD_REGISTER_REG *>(pCommandStream->getSpace(sizeof(MI_LOAD_REGISTER_REG)));
|
auto pCmd4 = pCommandStream->getSpaceForCmd<MI_LOAD_REGISTER_REG>();
|
||||||
*pCmd4 = GfxFamily::cmdInitLoadRegisterReg;
|
*pCmd4 = GfxFamily::cmdInitLoadRegisterReg;
|
||||||
pCmd4->setSourceRegisterAddress(CS_GPR_R0);
|
pCmd4->setSourceRegisterAddress(CS_GPR_R0);
|
||||||
pCmd4->setDestinationRegisterAddress(aluRegister);
|
pCmd4->setDestinationRegisterAddress(aluRegister);
|
||||||
|
|
||||||
// Add PIPE_CONTROL to flush caches
|
// Add PIPE_CONTROL to flush caches
|
||||||
auto pCmd5 = reinterpret_cast<PIPE_CONTROL *>(pCommandStream->getSpace(sizeof(PIPE_CONTROL)));
|
auto pCmd5 = pCommandStream->getSpaceForCmd<PIPE_CONTROL>();
|
||||||
*pCmd5 = GfxFamily::cmdInitPipeControl;
|
*pCmd5 = GfxFamily::cmdInitPipeControl;
|
||||||
pCmd5->setCommandStreamerStallEnable(true);
|
pCmd5->setCommandStreamerStallEnable(true);
|
||||||
pCmd5->setDcFlushEnable(true);
|
pCmd5->setDcFlushEnable(true);
|
||||||
@@ -106,7 +106,8 @@ void GpgpuWalkerHelper<GfxFamily>::addAluReadModifyWriteRegister(
|
|||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
void GpgpuWalkerHelper<GfxFamily>::dispatchProfilingCommandsStart(
|
void GpgpuWalkerHelper<GfxFamily>::dispatchProfilingCommandsStart(
|
||||||
TagNode<HwTimeStamps> &hwTimeStamps,
|
TagNode<HwTimeStamps> &hwTimeStamps,
|
||||||
NEO::LinearStream *commandStream) {
|
LinearStream *commandStream) {
|
||||||
|
|
||||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||||
|
|
||||||
// PIPE_CONTROL for global timestamp
|
// PIPE_CONTROL for global timestamp
|
||||||
@@ -118,7 +119,7 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchProfilingCommandsStart(
|
|||||||
timeStampAddress = hwTimeStamps.getGpuAddress() + offsetof(HwTimeStamps, ContextStartTS);
|
timeStampAddress = hwTimeStamps.getGpuAddress() + offsetof(HwTimeStamps, ContextStartTS);
|
||||||
|
|
||||||
//low part
|
//low part
|
||||||
auto pMICmdLow = (MI_STORE_REGISTER_MEM *)commandStream->getSpace(sizeof(MI_STORE_REGISTER_MEM));
|
auto pMICmdLow = commandStream->getSpaceForCmd<MI_STORE_REGISTER_MEM>();
|
||||||
*pMICmdLow = GfxFamily::cmdInitStoreRegisterMem;
|
*pMICmdLow = GfxFamily::cmdInitStoreRegisterMem;
|
||||||
adjustMiStoreRegMemMode(pMICmdLow);
|
adjustMiStoreRegMemMode(pMICmdLow);
|
||||||
pMICmdLow->setRegisterAddress(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
|
pMICmdLow->setRegisterAddress(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
|
||||||
@@ -128,12 +129,12 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchProfilingCommandsStart(
|
|||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
void GpgpuWalkerHelper<GfxFamily>::dispatchProfilingCommandsEnd(
|
void GpgpuWalkerHelper<GfxFamily>::dispatchProfilingCommandsEnd(
|
||||||
TagNode<HwTimeStamps> &hwTimeStamps,
|
TagNode<HwTimeStamps> &hwTimeStamps,
|
||||||
NEO::LinearStream *commandStream) {
|
LinearStream *commandStream) {
|
||||||
|
|
||||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||||
|
|
||||||
// PIPE_CONTROL for global timestamp
|
// PIPE_CONTROL for global timestamp
|
||||||
auto pPipeControlCmd = (PIPE_CONTROL *)commandStream->getSpace(sizeof(PIPE_CONTROL));
|
auto pPipeControlCmd = commandStream->getSpaceForCmd<PIPE_CONTROL>();
|
||||||
*pPipeControlCmd = GfxFamily::cmdInitPipeControl;
|
*pPipeControlCmd = GfxFamily::cmdInitPipeControl;
|
||||||
pPipeControlCmd->setCommandStreamerStallEnable(true);
|
pPipeControlCmd->setCommandStreamerStallEnable(true);
|
||||||
|
|
||||||
@@ -141,7 +142,7 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchProfilingCommandsEnd(
|
|||||||
uint64_t timeStampAddress = hwTimeStamps.getGpuAddress() + offsetof(HwTimeStamps, ContextEndTS);
|
uint64_t timeStampAddress = hwTimeStamps.getGpuAddress() + offsetof(HwTimeStamps, ContextEndTS);
|
||||||
|
|
||||||
//low part
|
//low part
|
||||||
auto pMICmdLow = (MI_STORE_REGISTER_MEM *)commandStream->getSpace(sizeof(MI_STORE_REGISTER_MEM));
|
auto pMICmdLow = commandStream->getSpaceForCmd<MI_STORE_REGISTER_MEM>();
|
||||||
*pMICmdLow = GfxFamily::cmdInitStoreRegisterMem;
|
*pMICmdLow = GfxFamily::cmdInitStoreRegisterMem;
|
||||||
adjustMiStoreRegMemMode(pMICmdLow);
|
adjustMiStoreRegMemMode(pMICmdLow);
|
||||||
pMICmdLow->setRegisterAddress(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
|
pMICmdLow->setRegisterAddress(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
|
||||||
@@ -149,144 +150,79 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchProfilingCommandsEnd(
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
void GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersNoopidRegisterCommands(
|
void GpgpuWalkerHelper<GfxFamily>::dispatchStoreRegisterCommand(
|
||||||
CommandQueue &commandQueue,
|
LinearStream *commandStream,
|
||||||
NEO::HwPerfCounter &hwPerfCounter,
|
uint64_t memoryAddress,
|
||||||
NEO::LinearStream *commandStream,
|
uint32_t registerAddress) {
|
||||||
bool start) {
|
|
||||||
|
|
||||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||||
|
|
||||||
uint64_t address = start ? reinterpret_cast<uint64_t>(&(hwPerfCounter.HWPerfCounters.DMAFenceIdBegin))
|
auto pCmd = commandStream->getSpaceForCmd<MI_STORE_REGISTER_MEM>();
|
||||||
: reinterpret_cast<uint64_t>(&(hwPerfCounter.HWPerfCounters.DMAFenceIdEnd));
|
*pCmd = GfxFamily::cmdInitStoreRegisterMem;
|
||||||
|
pCmd->setRegisterAddress(registerAddress);
|
||||||
auto pNoopIdRegister = (MI_STORE_REGISTER_MEM *)commandStream->getSpace(sizeof(MI_STORE_REGISTER_MEM));
|
pCmd->setMemoryAddress(memoryAddress);
|
||||||
*pNoopIdRegister = GfxFamily::cmdInitStoreRegisterMem;
|
|
||||||
pNoopIdRegister->setRegisterAddress(NEO::INSTR_MMIO_NOOPID);
|
|
||||||
pNoopIdRegister->setMemoryAddress(address);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename GfxFamily>
|
|
||||||
void GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersReadFreqRegisterCommands(
|
|
||||||
CommandQueue &commandQueue,
|
|
||||||
NEO::HwPerfCounter &hwPerfCounter,
|
|
||||||
NEO::LinearStream *commandStream,
|
|
||||||
bool start) {
|
|
||||||
|
|
||||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
|
||||||
|
|
||||||
uint64_t address = start ? reinterpret_cast<uint64_t>(&(hwPerfCounter.HWPerfCounters.CoreFreqBegin))
|
|
||||||
: reinterpret_cast<uint64_t>(&(hwPerfCounter.HWPerfCounters.CoreFreqEnd));
|
|
||||||
|
|
||||||
auto pCoreFreqRegister = (MI_STORE_REGISTER_MEM *)commandStream->getSpace(sizeof(MI_STORE_REGISTER_MEM));
|
|
||||||
*pCoreFreqRegister = GfxFamily::cmdInitStoreRegisterMem;
|
|
||||||
pCoreFreqRegister->setRegisterAddress(NEO::INSTR_MMIO_RPSTAT1);
|
|
||||||
pCoreFreqRegister->setMemoryAddress(address);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
void GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersGeneralPurposeCounterCommands(
|
void GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersGeneralPurposeCounterCommands(
|
||||||
CommandQueue &commandQueue,
|
CommandQueue &commandQueue,
|
||||||
NEO::HwPerfCounter &hwPerfCounter,
|
TagNode<HwPerfCounter> &hwPerfCounter,
|
||||||
NEO::LinearStream *commandStream,
|
LinearStream *commandStream,
|
||||||
bool start) {
|
bool start) {
|
||||||
|
|
||||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
uint64_t baseAddress = hwPerfCounter.getGpuAddress();
|
||||||
uint64_t address = 0;
|
baseAddress += start ? offsetof(HwPerfCounter, HWPerfCounters.HwPerfReportBegin.Gp)
|
||||||
const uint64_t baseAddress = start ? reinterpret_cast<uint64_t>(&(hwPerfCounter.HWPerfCounters.HwPerfReportBegin.Gp))
|
: offsetof(HwPerfCounter, HWPerfCounters.HwPerfReportEnd.Gp);
|
||||||
: reinterpret_cast<uint64_t>(&(hwPerfCounter.HWPerfCounters.HwPerfReportEnd.Gp));
|
|
||||||
|
|
||||||
// Read General Purpose counters
|
// Read General Purpose counters
|
||||||
for (uint16_t i = 0; i < NEO::INSTR_GENERAL_PURPOSE_COUNTERS_COUNT; i++) {
|
for (auto i = 0u; i < NEO::INSTR_GENERAL_PURPOSE_COUNTERS_COUNT; i++) {
|
||||||
auto pGeneralPurposeRegister = (MI_STORE_REGISTER_MEM *)commandStream->getSpace(sizeof(MI_STORE_REGISTER_MEM));
|
|
||||||
*pGeneralPurposeRegister = GfxFamily::cmdInitStoreRegisterMem;
|
|
||||||
uint32_t regAddr = INSTR_GFX_OFFSETS::INSTR_PERF_CNT_1_DW0 + i * sizeof(cl_uint);
|
uint32_t regAddr = INSTR_GFX_OFFSETS::INSTR_PERF_CNT_1_DW0 + i * sizeof(cl_uint);
|
||||||
pGeneralPurposeRegister->setRegisterAddress(regAddr);
|
|
||||||
//Gp field is 2*uint64 wide so it can hold 4 uint32
|
//Gp field is 2*uint64 wide so it can hold 4 uint32
|
||||||
address = baseAddress + i * sizeof(cl_uint);
|
uint64_t address = baseAddress + i * sizeof(cl_uint);
|
||||||
pGeneralPurposeRegister->setMemoryAddress(address);
|
dispatchStoreRegisterCommand(commandStream, address, regAddr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
void GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersUserCounterCommands(
|
void GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersUserCounterCommands(
|
||||||
CommandQueue &commandQueue,
|
CommandQueue &commandQueue,
|
||||||
NEO::HwPerfCounter &hwPerfCounter,
|
TagNode<HwPerfCounter> &hwPerfCounter,
|
||||||
NEO::LinearStream *commandStream,
|
LinearStream *commandStream,
|
||||||
bool start) {
|
bool start) {
|
||||||
|
|
||||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
uint64_t baseAddr = hwPerfCounter.getGpuAddress();
|
||||||
|
baseAddr += start ? offsetof(HwPerfCounter, HWPerfCounters.HwPerfReportBegin.User)
|
||||||
uint64_t address = 0;
|
: offsetof(HwPerfCounter, HWPerfCounters.HwPerfReportEnd.User);
|
||||||
const uint64_t baseAddr = start ? reinterpret_cast<uint64_t>(&(hwPerfCounter.HWPerfCounters.HwPerfReportBegin.User))
|
auto userRegs = &commandQueue.getPerfCountersConfigData()->ReadRegs;
|
||||||
: reinterpret_cast<uint64_t>(&(hwPerfCounter.HWPerfCounters.HwPerfReportEnd.User));
|
|
||||||
uint32_t cmdNum = 0;
|
|
||||||
uint32_t regAddr = 0;
|
|
||||||
auto configData = commandQueue.getPerfCountersConfigData();
|
|
||||||
auto userRegs = &configData->ReadRegs;
|
|
||||||
|
|
||||||
for (uint32_t i = 0; i < userRegs->RegsCount; i++) {
|
for (uint32_t i = 0; i < userRegs->RegsCount; i++) {
|
||||||
auto pRegister = (MI_STORE_REGISTER_MEM *)commandStream->getSpace(sizeof(MI_STORE_REGISTER_MEM));
|
uint32_t regAddr = userRegs->Reg[i].Offset;
|
||||||
*pRegister = GfxFamily::cmdInitStoreRegisterMem;
|
|
||||||
|
|
||||||
regAddr = userRegs->Reg[i].Offset;
|
|
||||||
pRegister->setRegisterAddress(regAddr);
|
|
||||||
//offset between base (low) registers is cl_ulong wide
|
//offset between base (low) registers is cl_ulong wide
|
||||||
address = baseAddr + i * sizeof(cl_ulong);
|
uint64_t address = baseAddr + i * sizeof(cl_ulong);
|
||||||
pRegister->setMemoryAddress(address);
|
dispatchStoreRegisterCommand(commandStream, address, regAddr);
|
||||||
cmdNum++;
|
|
||||||
|
|
||||||
if (userRegs->Reg[i].BitSize > 32) {
|
if (userRegs->Reg[i].BitSize > 32) {
|
||||||
pRegister = (MI_STORE_REGISTER_MEM *)commandStream->getSpace(sizeof(MI_STORE_REGISTER_MEM));
|
dispatchStoreRegisterCommand(commandStream, address + sizeof(cl_uint), regAddr + sizeof(cl_uint));
|
||||||
*pRegister = GfxFamily::cmdInitStoreRegisterMem;
|
|
||||||
|
|
||||||
regAddr += sizeof(cl_uint);
|
|
||||||
pRegister->setRegisterAddress(regAddr);
|
|
||||||
address += sizeof(cl_uint);
|
|
||||||
pRegister->setMemoryAddress(address);
|
|
||||||
cmdNum++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
void GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersOABufferStateCommands(
|
void GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersOABufferStateCommands(
|
||||||
CommandQueue &commandQueue,
|
TagNode<HwPerfCounter> &hwPerfCounter,
|
||||||
NEO::HwPerfCounter &hwPerfCounter,
|
LinearStream *commandStream) {
|
||||||
NEO::LinearStream *commandStream) {
|
|
||||||
|
|
||||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
dispatchStoreRegisterCommand(commandStream, hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.OaStatus), INSTR_GFX_OFFSETS::INSTR_OA_STATUS);
|
||||||
|
dispatchStoreRegisterCommand(commandStream, hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.OaHead), INSTR_GFX_OFFSETS::INSTR_OA_HEAD_PTR);
|
||||||
uint64_t address = 0;
|
dispatchStoreRegisterCommand(commandStream, hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.OaTail), INSTR_GFX_OFFSETS::INSTR_OA_TAIL_PTR);
|
||||||
//OA Status
|
|
||||||
auto pOaRegister = (MI_STORE_REGISTER_MEM *)commandStream->getSpace(sizeof(MI_STORE_REGISTER_MEM));
|
|
||||||
*pOaRegister = GfxFamily::cmdInitStoreRegisterMem;
|
|
||||||
pOaRegister->setRegisterAddress(INSTR_GFX_OFFSETS::INSTR_OA_STATUS);
|
|
||||||
address = reinterpret_cast<uint64_t>(&(hwPerfCounter.HWPerfCounters.OaStatus));
|
|
||||||
pOaRegister->setMemoryAddress(address);
|
|
||||||
|
|
||||||
//OA Head
|
|
||||||
pOaRegister = (MI_STORE_REGISTER_MEM *)commandStream->getSpace(sizeof(MI_STORE_REGISTER_MEM));
|
|
||||||
*pOaRegister = GfxFamily::cmdInitStoreRegisterMem;
|
|
||||||
pOaRegister->setRegisterAddress(INSTR_GFX_OFFSETS::INSTR_OA_HEAD_PTR);
|
|
||||||
address = reinterpret_cast<uint64_t>(&(hwPerfCounter.HWPerfCounters.OaHead));
|
|
||||||
pOaRegister->setMemoryAddress(address);
|
|
||||||
|
|
||||||
//OA Tail
|
|
||||||
pOaRegister = (MI_STORE_REGISTER_MEM *)commandStream->getSpace(sizeof(MI_STORE_REGISTER_MEM));
|
|
||||||
*pOaRegister = GfxFamily::cmdInitStoreRegisterMem;
|
|
||||||
pOaRegister->setRegisterAddress(INSTR_GFX_OFFSETS::INSTR_OA_TAIL_PTR);
|
|
||||||
address = reinterpret_cast<uint64_t>(&(hwPerfCounter.HWPerfCounters.OaTail));
|
|
||||||
pOaRegister->setMemoryAddress(address);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
void GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersCommandsStart(
|
void GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersCommandsStart(
|
||||||
CommandQueue &commandQueue,
|
CommandQueue &commandQueue,
|
||||||
NEO::HwPerfCounter &hwPerfCounter,
|
TagNode<HwPerfCounter> &hwPerfCounter,
|
||||||
NEO::LinearStream *commandStream) {
|
LinearStream *commandStream) {
|
||||||
|
|
||||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
|
||||||
using MI_REPORT_PERF_COUNT = typename GfxFamily::MI_REPORT_PERF_COUNT;
|
using MI_REPORT_PERF_COUNT = typename GfxFamily::MI_REPORT_PERF_COUNT;
|
||||||
|
|
||||||
auto perfCounters = commandQueue.getPerfCounters();
|
auto perfCounters = commandQueue.getPerfCounters();
|
||||||
@@ -294,25 +230,25 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersCommandsStart(
|
|||||||
uint32_t currentReportId = perfCounters->getCurrentReportId();
|
uint32_t currentReportId = perfCounters->getCurrentReportId();
|
||||||
uint64_t address = 0;
|
uint64_t address = 0;
|
||||||
//flush command streamer
|
//flush command streamer
|
||||||
auto pPipeControlCmd = (PIPE_CONTROL *)commandStream->getSpace(sizeof(PIPE_CONTROL));
|
auto pPipeControlCmd = commandStream->getSpaceForCmd<PIPE_CONTROL>();
|
||||||
*pPipeControlCmd = GfxFamily::cmdInitPipeControl;
|
*pPipeControlCmd = GfxFamily::cmdInitPipeControl;
|
||||||
pPipeControlCmd->setCommandStreamerStallEnable(true);
|
pPipeControlCmd->setCommandStreamerStallEnable(true);
|
||||||
|
|
||||||
//Store value of NOOPID register
|
//Store value of NOOPID register
|
||||||
GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersNoopidRegisterCommands(commandQueue, hwPerfCounter, commandStream, true);
|
GpgpuWalkerHelper<GfxFamily>::dispatchStoreRegisterCommand(commandStream, hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.DMAFenceIdBegin), INSTR_MMIO_NOOPID);
|
||||||
|
|
||||||
//Read Core Frequency
|
//Read Core Frequency
|
||||||
GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersReadFreqRegisterCommands(commandQueue, hwPerfCounter, commandStream, true);
|
GpgpuWalkerHelper<GfxFamily>::dispatchStoreRegisterCommand(commandStream, hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.CoreFreqBegin), INSTR_MMIO_RPSTAT1);
|
||||||
|
|
||||||
GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersGeneralPurposeCounterCommands(commandQueue, hwPerfCounter, commandStream, true);
|
GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersGeneralPurposeCounterCommands(commandQueue, hwPerfCounter, commandStream, true);
|
||||||
|
|
||||||
auto pReportPerfCount = (MI_REPORT_PERF_COUNT *)commandStream->getSpace(sizeof(MI_REPORT_PERF_COUNT));
|
auto pReportPerfCount = commandStream->getSpaceForCmd<MI_REPORT_PERF_COUNT>();
|
||||||
*pReportPerfCount = GfxFamily::cmdInitReportPerfCount;
|
*pReportPerfCount = GfxFamily::cmdInitReportPerfCount;
|
||||||
pReportPerfCount->setReportId(currentReportId);
|
pReportPerfCount->setReportId(currentReportId);
|
||||||
address = reinterpret_cast<uint64_t>(&(hwPerfCounter.HWPerfCounters.HwPerfReportBegin.Oa));
|
address = hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.HwPerfReportBegin.Oa);
|
||||||
pReportPerfCount->setMemoryAddress(address);
|
pReportPerfCount->setMemoryAddress(address);
|
||||||
|
|
||||||
address = reinterpret_cast<uint64_t>(&(hwPerfCounter.HWTimeStamp.GlobalStartTS));
|
address = hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWTimeStamp.GlobalStartTS);
|
||||||
|
|
||||||
PipeControlHelper<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(commandStream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, address, 0llu, false);
|
PipeControlHelper<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(commandStream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, address, 0llu, false);
|
||||||
|
|
||||||
@@ -324,41 +260,39 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersCommandsStart(
|
|||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
void GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersCommandsEnd(
|
void GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersCommandsEnd(
|
||||||
CommandQueue &commandQueue,
|
CommandQueue &commandQueue,
|
||||||
NEO::HwPerfCounter &hwPerfCounter,
|
TagNode<HwPerfCounter> &hwPerfCounter,
|
||||||
NEO::LinearStream *commandStream) {
|
LinearStream *commandStream) {
|
||||||
|
|
||||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
|
||||||
using MI_REPORT_PERF_COUNT = typename GfxFamily::MI_REPORT_PERF_COUNT;
|
using MI_REPORT_PERF_COUNT = typename GfxFamily::MI_REPORT_PERF_COUNT;
|
||||||
|
|
||||||
auto perfCounters = commandQueue.getPerfCounters();
|
auto perfCounters = commandQueue.getPerfCounters();
|
||||||
|
|
||||||
uint32_t currentReportId = perfCounters->getCurrentReportId();
|
uint32_t currentReportId = perfCounters->getCurrentReportId();
|
||||||
uint64_t address = 0;
|
|
||||||
|
|
||||||
//flush command streamer
|
//flush command streamer
|
||||||
auto pPipeControlCmd = (PIPE_CONTROL *)commandStream->getSpace(sizeof(PIPE_CONTROL));
|
auto pPipeControlCmd = commandStream->getSpaceForCmd<PIPE_CONTROL>();
|
||||||
*pPipeControlCmd = GfxFamily::cmdInitPipeControl;
|
*pPipeControlCmd = GfxFamily::cmdInitPipeControl;
|
||||||
pPipeControlCmd->setCommandStreamerStallEnable(true);
|
pPipeControlCmd->setCommandStreamerStallEnable(true);
|
||||||
|
|
||||||
GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersOABufferStateCommands(commandQueue, hwPerfCounter, commandStream);
|
GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersOABufferStateCommands(hwPerfCounter, commandStream);
|
||||||
|
|
||||||
//Timestamp: Global End
|
//Timestamp: Global End
|
||||||
address = reinterpret_cast<uint64_t>(&(hwPerfCounter.HWTimeStamp.GlobalEndTS));
|
uint64_t address = hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWTimeStamp.GlobalEndTS);
|
||||||
PipeControlHelper<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(commandStream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, address, 0llu, false);
|
PipeControlHelper<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(commandStream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, address, 0llu, false);
|
||||||
|
|
||||||
auto pReportPerfCount = (MI_REPORT_PERF_COUNT *)commandStream->getSpace(sizeof(MI_REPORT_PERF_COUNT));
|
auto pReportPerfCount = commandStream->getSpaceForCmd<MI_REPORT_PERF_COUNT>();
|
||||||
*pReportPerfCount = GfxFamily::cmdInitReportPerfCount;
|
*pReportPerfCount = GfxFamily::cmdInitReportPerfCount;
|
||||||
pReportPerfCount->setReportId(currentReportId);
|
pReportPerfCount->setReportId(currentReportId);
|
||||||
address = reinterpret_cast<uint64_t>(&(hwPerfCounter.HWPerfCounters.HwPerfReportEnd.Oa));
|
address = hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.HwPerfReportEnd.Oa);
|
||||||
pReportPerfCount->setMemoryAddress(address);
|
pReportPerfCount->setMemoryAddress(address);
|
||||||
|
|
||||||
GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersGeneralPurposeCounterCommands(commandQueue, hwPerfCounter, commandStream, false);
|
GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersGeneralPurposeCounterCommands(commandQueue, hwPerfCounter, commandStream, false);
|
||||||
|
|
||||||
//Store value of NOOPID register
|
//Store value of NOOPID register
|
||||||
GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersNoopidRegisterCommands(commandQueue, hwPerfCounter, commandStream, false);
|
GpgpuWalkerHelper<GfxFamily>::dispatchStoreRegisterCommand(commandStream, hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.DMAFenceIdEnd), INSTR_MMIO_NOOPID);
|
||||||
|
|
||||||
//Read Core Frequency
|
//Read Core Frequency
|
||||||
GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersReadFreqRegisterCommands(commandQueue, hwPerfCounter, commandStream, false);
|
GpgpuWalkerHelper<GfxFamily>::dispatchStoreRegisterCommand(commandStream, hwPerfCounter.getGpuAddress() + offsetof(HwPerfCounter, HWPerfCounters.CoreFreqEnd), INSTR_MMIO_RPSTAT1);
|
||||||
|
|
||||||
GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersUserCounterCommands(commandQueue, hwPerfCounter, commandStream, false);
|
GpgpuWalkerHelper<GfxFamily>::dispatchPerfCountersUserCounterCommands(commandQueue, hwPerfCounter, commandStream, false);
|
||||||
|
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ class HardwareInterface {
|
|||||||
const CsrDependencies &csrDependencies,
|
const CsrDependencies &csrDependencies,
|
||||||
KernelOperation **blockedCommandsData,
|
KernelOperation **blockedCommandsData,
|
||||||
TagNode<HwTimeStamps> *hwTimeStamps,
|
TagNode<HwTimeStamps> *hwTimeStamps,
|
||||||
HwPerfCounter *hwPerfCounter,
|
TagNode<HwPerfCounter> *hwPerfCounter,
|
||||||
TimestampPacketContainer *previousTimestampPacketNodes,
|
TimestampPacketContainer *previousTimestampPacketNodes,
|
||||||
TimestampPacketContainer *currentTimestampPacketNodes,
|
TimestampPacketContainer *currentTimestampPacketNodes,
|
||||||
PreemptionMode preemptionMode,
|
PreemptionMode preemptionMode,
|
||||||
@@ -67,13 +67,13 @@ class HardwareInterface {
|
|||||||
const DispatchInfo &dispatchInfo,
|
const DispatchInfo &dispatchInfo,
|
||||||
const MultiDispatchInfo &multiDispatchInfo,
|
const MultiDispatchInfo &multiDispatchInfo,
|
||||||
TagNode<HwTimeStamps> *hwTimeStamps,
|
TagNode<HwTimeStamps> *hwTimeStamps,
|
||||||
HwPerfCounter *hwPerfCounter,
|
TagNode<HwPerfCounter> *hwPerfCounter,
|
||||||
LinearStream *commandStream,
|
LinearStream *commandStream,
|
||||||
CommandQueue &commandQueue);
|
CommandQueue &commandQueue);
|
||||||
|
|
||||||
static void dispatchProfilingPerfEndCommands(
|
static void dispatchProfilingPerfEndCommands(
|
||||||
TagNode<HwTimeStamps> *hwTimeStamps,
|
TagNode<HwTimeStamps> *hwTimeStamps,
|
||||||
HwPerfCounter *hwPerfCounter,
|
TagNode<HwPerfCounter> *hwPerfCounter,
|
||||||
LinearStream *commandStream,
|
LinearStream *commandStream,
|
||||||
CommandQueue &commandQueue);
|
CommandQueue &commandQueue);
|
||||||
|
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
|||||||
const CsrDependencies &csrDependencies,
|
const CsrDependencies &csrDependencies,
|
||||||
KernelOperation **blockedCommandsData,
|
KernelOperation **blockedCommandsData,
|
||||||
TagNode<HwTimeStamps> *hwTimeStamps,
|
TagNode<HwTimeStamps> *hwTimeStamps,
|
||||||
HwPerfCounter *hwPerfCounter,
|
TagNode<HwPerfCounter> *hwPerfCounter,
|
||||||
TimestampPacketContainer *previousTimestampPacketNodes,
|
TimestampPacketContainer *previousTimestampPacketNodes,
|
||||||
TimestampPacketContainer *currentTimestampPacketNodes,
|
TimestampPacketContainer *currentTimestampPacketNodes,
|
||||||
PreemptionMode preemptionMode,
|
PreemptionMode preemptionMode,
|
||||||
|
|||||||
@@ -53,7 +53,7 @@ inline void HardwareInterface<GfxFamily>::dispatchProfilingPerfStartCommands(
|
|||||||
const DispatchInfo &dispatchInfo,
|
const DispatchInfo &dispatchInfo,
|
||||||
const MultiDispatchInfo &multiDispatchInfo,
|
const MultiDispatchInfo &multiDispatchInfo,
|
||||||
TagNode<HwTimeStamps> *hwTimeStamps,
|
TagNode<HwTimeStamps> *hwTimeStamps,
|
||||||
HwPerfCounter *hwPerfCounter,
|
TagNode<HwPerfCounter> *hwPerfCounter,
|
||||||
LinearStream *commandStream,
|
LinearStream *commandStream,
|
||||||
CommandQueue &commandQueue) {
|
CommandQueue &commandQueue) {
|
||||||
|
|
||||||
@@ -71,7 +71,7 @@ inline void HardwareInterface<GfxFamily>::dispatchProfilingPerfStartCommands(
|
|||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
inline void HardwareInterface<GfxFamily>::dispatchProfilingPerfEndCommands(
|
inline void HardwareInterface<GfxFamily>::dispatchProfilingPerfEndCommands(
|
||||||
TagNode<HwTimeStamps> *hwTimeStamps,
|
TagNode<HwTimeStamps> *hwTimeStamps,
|
||||||
HwPerfCounter *hwPerfCounter,
|
TagNode<HwPerfCounter> *hwPerfCounter,
|
||||||
LinearStream *commandStream,
|
LinearStream *commandStream,
|
||||||
CommandQueue &commandQueue) {
|
CommandQueue &commandQueue) {
|
||||||
|
|
||||||
|
|||||||
@@ -213,7 +213,6 @@ void DeviceQueueHw<GfxFamily>::addExecutionModelCleanUpSection(Kernel *parentKer
|
|||||||
igilQueue->m_controls.m_CleanupSectionAddress = ptrOffset(slbBuffer->getGpuAddress(), slbCS.getUsed());
|
igilQueue->m_controls.m_CleanupSectionAddress = ptrOffset(slbBuffer->getGpuAddress(), slbCS.getUsed());
|
||||||
GpgpuWalkerHelper<GfxFamily>::applyWADisableLSQCROPERFforOCL(&slbCS, *parentKernel, true);
|
GpgpuWalkerHelper<GfxFamily>::applyWADisableLSQCROPERFforOCL(&slbCS, *parentKernel, true);
|
||||||
|
|
||||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
|
||||||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||||
|
|
||||||
if (hwTimeStamp != nullptr) {
|
if (hwTimeStamp != nullptr) {
|
||||||
|
|||||||
@@ -1194,13 +1194,8 @@ TEST_F(EventTest, hwPerfCounterMemoryIsPlacedInGraphicsAllocation) {
|
|||||||
void *memoryStorage = allocation->getUnderlyingBuffer();
|
void *memoryStorage = allocation->getUnderlyingBuffer();
|
||||||
size_t graphicsAllocationSize = allocation->getUnderlyingBufferSize();
|
size_t graphicsAllocationSize = allocation->getUnderlyingBufferSize();
|
||||||
|
|
||||||
uintptr_t perfCounterAddress = reinterpret_cast<uintptr_t>(perfCounter);
|
EXPECT_GE(perfCounter, memoryStorage);
|
||||||
uintptr_t graphicsAllocationStart = reinterpret_cast<uintptr_t>(memoryStorage);
|
EXPECT_LE(perfCounter + 1, ptrOffset(memoryStorage, graphicsAllocationSize));
|
||||||
|
|
||||||
if (!((perfCounterAddress >= graphicsAllocationStart) &&
|
|
||||||
((perfCounterAddress + sizeof(HwPerfCounter)) <= (graphicsAllocationStart + graphicsAllocationSize)))) {
|
|
||||||
EXPECT_TRUE(false);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(EventTest, IsPerfCounter_DisabledByNullQueue) {
|
TEST_F(EventTest, IsPerfCounter_DisabledByNullQueue) {
|
||||||
|
|||||||
@@ -51,6 +51,8 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
|||||||
using BaseClass::CommandStreamReceiver::latestSentStatelessMocsConfig;
|
using BaseClass::CommandStreamReceiver::latestSentStatelessMocsConfig;
|
||||||
using BaseClass::CommandStreamReceiver::latestSentTaskCount;
|
using BaseClass::CommandStreamReceiver::latestSentTaskCount;
|
||||||
using BaseClass::CommandStreamReceiver::mediaVfeStateDirty;
|
using BaseClass::CommandStreamReceiver::mediaVfeStateDirty;
|
||||||
|
using BaseClass::CommandStreamReceiver::perfCounterAllocator;
|
||||||
|
using BaseClass::CommandStreamReceiver::profilingTimeStampAllocator;
|
||||||
using BaseClass::CommandStreamReceiver::requiredScratchSize;
|
using BaseClass::CommandStreamReceiver::requiredScratchSize;
|
||||||
using BaseClass::CommandStreamReceiver::requiredThreadArbitrationPolicy;
|
using BaseClass::CommandStreamReceiver::requiredThreadArbitrationPolicy;
|
||||||
using BaseClass::CommandStreamReceiver::samplerCacheFlushRequired;
|
using BaseClass::CommandStreamReceiver::samplerCacheFlushRequired;
|
||||||
|
|||||||
@@ -422,7 +422,9 @@ class MyOSTime : public OSTime {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
int MyOSTime::instanceNum = 0;
|
int MyOSTime::instanceNum = 0;
|
||||||
|
|
||||||
TEST(EventProfilingTest, givenEventWhenCompleteIsZeroThenCalcProfilingDataSetsEndTimestampInCompleteTimestampAndDoesntCallOsTimeMethods) {
|
TEST(EventProfilingTest, givenEventWhenCompleteIsZeroThenCalcProfilingDataSetsEndTimestampInCompleteTimestampAndDoesntCallOsTimeMethods) {
|
||||||
std::unique_ptr<MockDevice> device(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
|
std::unique_ptr<MockDevice> device(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
|
||||||
MyOSTime::instanceNum = 0;
|
MyOSTime::instanceNum = 0;
|
||||||
@@ -523,7 +525,21 @@ struct ProfilingWithPerfCountersTests : public ProfilingTests,
|
|||||||
ProfilingTests::TearDown();
|
ProfilingTests::TearDown();
|
||||||
PerformanceCountersFixture::TearDown();
|
PerformanceCountersFixture::TearDown();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename GfxFamily>
|
||||||
|
GenCmdList::iterator expectStoreRegister(GenCmdList::iterator itor, uint64_t memoryAddress, uint32_t registerAddress) {
|
||||||
|
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||||
|
|
||||||
|
itor = find<MI_STORE_REGISTER_MEM *>(itor, cmdList.end());
|
||||||
|
EXPECT_NE(cmdList.end(), itor);
|
||||||
|
auto pStore = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||||
|
EXPECT_EQ(memoryAddress, pStore->getMemoryAddress());
|
||||||
|
EXPECT_EQ(registerAddress, pStore->getRegisterAddress());
|
||||||
|
itor++;
|
||||||
|
return itor;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingWithPerfCountersTests, GIVENCommandQueueWithProfilingPerfCounterAndForWorkloadWithKernelWHENGetCSFromCmdQueueTHENEnoughSpaceInCS) {
|
HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingWithPerfCountersTests, GIVENCommandQueueWithProfilingPerfCounterAndForWorkloadWithKernelWHENGetCSFromCmdQueueTHENEnoughSpaceInCS) {
|
||||||
typedef typename FamilyType::MI_STORE_REGISTER_MEM MI_STORE_REGISTER_MEM;
|
typedef typename FamilyType::MI_STORE_REGISTER_MEM MI_STORE_REGISTER_MEM;
|
||||||
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
|
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
|
||||||
@@ -859,6 +875,88 @@ HWTEST_F(ProfilingWithPerfCountersTests,
|
|||||||
pCmdQ->setPerfCountersEnabled(false, UINT32_MAX);
|
pCmdQ->setPerfCountersEnabled(false, UINT32_MAX);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename TagType>
|
||||||
|
struct FixedGpuAddressTagAllocator : TagAllocator<TagType> {
|
||||||
|
|
||||||
|
struct MockTagNode : TagNode<TagType> {
|
||||||
|
void setGpuAddress(uint64_t value) { this->gpuAddress = value; }
|
||||||
|
};
|
||||||
|
|
||||||
|
FixedGpuAddressTagAllocator(CommandStreamReceiver &csr, uint64_t gpuAddress)
|
||||||
|
: TagAllocator<TagType>(csr.getMemoryManager(), csr.getPreferredTagPoolSize(), MemoryConstants::cacheLineSize) {
|
||||||
|
auto tag = reinterpret_cast<MockTagNode *>(this->freeTags.peekHead());
|
||||||
|
tag->setGpuAddress(gpuAddress);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
HWTEST_F(ProfilingWithPerfCountersTests, GIVENCommandQueueWithProfilingPerfCountersWHENWalkerIsDispatchedTHENRegisterStoresArePresentInCS) {
|
||||||
|
uint64_t timeStampGpuAddress = 0x123456000;
|
||||||
|
uint64_t perfCountersGpuAddress = 0xabcdef000;
|
||||||
|
|
||||||
|
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||||
|
csr.profilingTimeStampAllocator.reset(new FixedGpuAddressTagAllocator<HwTimeStamps>(csr, timeStampGpuAddress));
|
||||||
|
csr.perfCounterAllocator.reset(new FixedGpuAddressTagAllocator<HwPerfCounter>(csr, perfCountersGpuAddress));
|
||||||
|
|
||||||
|
pCmdQ->setPerfCountersEnabled(true, 1);
|
||||||
|
|
||||||
|
MockKernel kernel(program.get(), kernelInfo, *pDevice);
|
||||||
|
ASSERT_EQ(CL_SUCCESS, kernel.initialize());
|
||||||
|
|
||||||
|
size_t globalOffsets[3] = {0, 0, 0};
|
||||||
|
size_t workItems[3] = {1, 1, 1};
|
||||||
|
uint32_t dimensions = 1;
|
||||||
|
cl_event event;
|
||||||
|
cl_kernel clKernel = &kernel;
|
||||||
|
|
||||||
|
static_cast<CommandQueueHw<FamilyType> *>(pCmdQ)->enqueueKernel(
|
||||||
|
clKernel,
|
||||||
|
dimensions,
|
||||||
|
globalOffsets,
|
||||||
|
workItems,
|
||||||
|
nullptr,
|
||||||
|
0,
|
||||||
|
nullptr,
|
||||||
|
&event);
|
||||||
|
|
||||||
|
auto pEvent = static_cast<MockEvent<Event> *>(event);
|
||||||
|
EXPECT_EQ(pEvent->getHwTimeStampNode()->getGpuAddress(), timeStampGpuAddress);
|
||||||
|
EXPECT_EQ(pEvent->getHwPerfCounterNode()->getGpuAddress(), perfCountersGpuAddress);
|
||||||
|
parseCommands<FamilyType>(*pCmdQ);
|
||||||
|
|
||||||
|
auto itor = expectStoreRegister<FamilyType>(cmdList.begin(), timeStampGpuAddress + offsetof(HwTimeStamps, ContextStartTS), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
|
||||||
|
itor = expectStoreRegister<FamilyType>(itor, perfCountersGpuAddress + offsetof(HwPerfCounter, HWPerfCounters.DMAFenceIdBegin), INSTR_MMIO_NOOPID);
|
||||||
|
itor = expectStoreRegister<FamilyType>(itor, perfCountersGpuAddress + offsetof(HwPerfCounter, HWPerfCounters.CoreFreqBegin), INSTR_MMIO_RPSTAT1);
|
||||||
|
for (auto i = 0u; i < NEO::INSTR_GENERAL_PURPOSE_COUNTERS_COUNT; i++) {
|
||||||
|
itor = expectStoreRegister<FamilyType>(itor, perfCountersGpuAddress + offsetof(HwPerfCounter, HWPerfCounters.HwPerfReportBegin.Gp) + i * sizeof(cl_uint),
|
||||||
|
INSTR_GFX_OFFSETS::INSTR_PERF_CNT_1_DW0 + i * sizeof(cl_uint));
|
||||||
|
}
|
||||||
|
itor = expectStoreRegister<FamilyType>(itor, perfCountersGpuAddress + offsetof(HwPerfCounter, HWPerfCounters.HwPerfReportBegin.User), 0);
|
||||||
|
itor = expectStoreRegister<FamilyType>(itor, perfCountersGpuAddress + offsetof(HwPerfCounter, HWPerfCounters.HwPerfReportBegin.User) + 8, 0);
|
||||||
|
itor = expectStoreRegister<FamilyType>(itor, perfCountersGpuAddress + offsetof(HwPerfCounter, HWPerfCounters.HwPerfReportBegin.User) + 12, 4);
|
||||||
|
|
||||||
|
// after WALKER:
|
||||||
|
|
||||||
|
itor = expectStoreRegister<FamilyType>(itor, timeStampGpuAddress + offsetof(HwTimeStamps, ContextEndTS), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
|
||||||
|
itor = expectStoreRegister<FamilyType>(itor, perfCountersGpuAddress + offsetof(HwPerfCounter, HWPerfCounters.OaStatus), INSTR_GFX_OFFSETS::INSTR_OA_STATUS);
|
||||||
|
itor = expectStoreRegister<FamilyType>(itor, perfCountersGpuAddress + offsetof(HwPerfCounter, HWPerfCounters.OaHead), INSTR_GFX_OFFSETS::INSTR_OA_HEAD_PTR);
|
||||||
|
itor = expectStoreRegister<FamilyType>(itor, perfCountersGpuAddress + offsetof(HwPerfCounter, HWPerfCounters.OaTail), INSTR_GFX_OFFSETS::INSTR_OA_TAIL_PTR);
|
||||||
|
for (auto i = 0u; i < NEO::INSTR_GENERAL_PURPOSE_COUNTERS_COUNT; i++) {
|
||||||
|
itor = expectStoreRegister<FamilyType>(itor, perfCountersGpuAddress + offsetof(HwPerfCounter, HWPerfCounters.HwPerfReportEnd.Gp) + i * sizeof(cl_uint),
|
||||||
|
INSTR_GFX_OFFSETS::INSTR_PERF_CNT_1_DW0 + i * sizeof(cl_uint));
|
||||||
|
}
|
||||||
|
itor = expectStoreRegister<FamilyType>(itor, perfCountersGpuAddress + offsetof(HwPerfCounter, HWPerfCounters.DMAFenceIdEnd), INSTR_MMIO_NOOPID);
|
||||||
|
itor = expectStoreRegister<FamilyType>(itor, perfCountersGpuAddress + offsetof(HwPerfCounter, HWPerfCounters.CoreFreqEnd), INSTR_MMIO_RPSTAT1);
|
||||||
|
itor = expectStoreRegister<FamilyType>(itor, perfCountersGpuAddress + offsetof(HwPerfCounter, HWPerfCounters.HwPerfReportEnd.User), 0);
|
||||||
|
itor = expectStoreRegister<FamilyType>(itor, perfCountersGpuAddress + offsetof(HwPerfCounter, HWPerfCounters.HwPerfReportEnd.User) + 8, 0);
|
||||||
|
itor = expectStoreRegister<FamilyType>(itor, perfCountersGpuAddress + offsetof(HwPerfCounter, HWPerfCounters.HwPerfReportEnd.User) + 12, 4);
|
||||||
|
|
||||||
|
EXPECT_TRUE(pEvent->calcProfilingData());
|
||||||
|
|
||||||
|
clReleaseEvent(event);
|
||||||
|
|
||||||
|
pCmdQ->setPerfCountersEnabled(false, UINT32_MAX);
|
||||||
|
}
|
||||||
|
|
||||||
struct MockTimestampContainer : public TimestampPacketContainer {
|
struct MockTimestampContainer : public TimestampPacketContainer {
|
||||||
~MockTimestampContainer() override {
|
~MockTimestampContainer() override {
|
||||||
for (const auto &node : timestampPacketNodes) {
|
for (const auto &node : timestampPacketNodes) {
|
||||||
|
|||||||
Reference in New Issue
Block a user