mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-20 13:11:34 +08:00
Revert "Use GPU instead of CPU address in programming commands for HwTim(...)"
This reverts commit 6202b2222b
.
"Use GPU instead of CPU address in programming commands for HwTimeStamps"
Change-Id: I085382d95538ae41068a21c628d606039bf9cdf0
This commit is contained in:
@ -146,7 +146,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
|||||||
auto devQueue = this->getContext().getDefaultDeviceQueue();
|
auto devQueue = this->getContext().getDefaultDeviceQueue();
|
||||||
DeviceQueueHw<GfxFamily> *devQueueHw = castToObject<DeviceQueueHw<GfxFamily>>(devQueue);
|
DeviceQueueHw<GfxFamily> *devQueueHw = castToObject<DeviceQueueHw<GfxFamily>>(devQueue);
|
||||||
|
|
||||||
TagNode<HwTimeStamps> *hwTimeStamps = nullptr;
|
HwTimeStamps *hwTimeStamps = nullptr;
|
||||||
|
|
||||||
auto commandStreamRecieverOwnership = getCommandStreamReceiver().obtainUniqueOwnership();
|
auto commandStreamRecieverOwnership = getCommandStreamReceiver().obtainUniqueOwnership();
|
||||||
|
|
||||||
@ -230,7 +230,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
|||||||
}
|
}
|
||||||
if (this->isProfilingEnabled()) {
|
if (this->isProfilingEnabled()) {
|
||||||
// Get allocation for timestamps
|
// Get allocation for timestamps
|
||||||
hwTimeStamps = eventBuilder.getEvent()->getHwTimeStampNode();
|
hwTimeStamps = eventBuilder.getEvent()->getHwTimeStampNode()->tag;
|
||||||
if (this->isPerfCountersEnabled()) {
|
if (this->isPerfCountersEnabled()) {
|
||||||
hwPerfCounter = eventBuilder.getEvent()->getHwPerfCounterNode()->tag;
|
hwPerfCounter = eventBuilder.getEvent()->getHwPerfCounterNode()->tag;
|
||||||
// PERF COUNTER: copy current configuration from queue to event
|
// PERF COUNTER: copy current configuration from queue to event
|
||||||
|
@ -139,11 +139,11 @@ class GpgpuWalkerHelper {
|
|||||||
const iOpenCL::SPatchThreadPayload &threadPayload);
|
const iOpenCL::SPatchThreadPayload &threadPayload);
|
||||||
|
|
||||||
static void dispatchProfilingCommandsStart(
|
static void dispatchProfilingCommandsStart(
|
||||||
TagNode<HwTimeStamps> &hwTimeStamps,
|
HwTimeStamps &hwTimeStamps,
|
||||||
OCLRT::LinearStream *commandStream);
|
OCLRT::LinearStream *commandStream);
|
||||||
|
|
||||||
static void dispatchProfilingCommandsEnd(
|
static void dispatchProfilingCommandsEnd(
|
||||||
TagNode<HwTimeStamps> &hwTimeStamps,
|
HwTimeStamps &hwTimeStamps,
|
||||||
OCLRT::LinearStream *commandStream);
|
OCLRT::LinearStream *commandStream);
|
||||||
|
|
||||||
static void dispatchPerfCountersNoopidRegisterCommands(
|
static void dispatchPerfCountersNoopidRegisterCommands(
|
||||||
|
@ -101,17 +101,17 @@ void GpgpuWalkerHelper<GfxFamily>::addAluReadModifyWriteRegister(
|
|||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
void GpgpuWalkerHelper<GfxFamily>::dispatchProfilingCommandsStart(
|
void GpgpuWalkerHelper<GfxFamily>::dispatchProfilingCommandsStart(
|
||||||
TagNode<HwTimeStamps> &hwTimeStamps,
|
HwTimeStamps &hwTimeStamps,
|
||||||
OCLRT::LinearStream *commandStream) {
|
OCLRT::LinearStream *commandStream) {
|
||||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||||
|
|
||||||
// PIPE_CONTROL for global timestamp
|
// PIPE_CONTROL for global timestamp
|
||||||
uint64_t TimeStampAddress = hwTimeStamps.getGraphicsAllocation()->getGpuAddress() + ptrDiff(&hwTimeStamps.tag->GlobalStartTS, hwTimeStamps.tag);
|
uint64_t TimeStampAddress = reinterpret_cast<uint64_t>(&(hwTimeStamps.GlobalStartTS));
|
||||||
|
|
||||||
PipeControlHelper<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(commandStream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, TimeStampAddress, 0llu);
|
PipeControlHelper<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(commandStream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, TimeStampAddress, 0llu);
|
||||||
|
|
||||||
//MI_STORE_REGISTER_MEM for context local timestamp
|
//MI_STORE_REGISTER_MEM for context local timestamp
|
||||||
TimeStampAddress = hwTimeStamps.getGraphicsAllocation()->getGpuAddress() + ptrDiff(&hwTimeStamps.tag->ContextStartTS, hwTimeStamps.tag);
|
TimeStampAddress = reinterpret_cast<uint64_t>(&(hwTimeStamps.ContextStartTS));
|
||||||
|
|
||||||
//low part
|
//low part
|
||||||
auto pMICmdLow = (MI_STORE_REGISTER_MEM *)commandStream->getSpace(sizeof(MI_STORE_REGISTER_MEM));
|
auto pMICmdLow = (MI_STORE_REGISTER_MEM *)commandStream->getSpace(sizeof(MI_STORE_REGISTER_MEM));
|
||||||
@ -122,7 +122,7 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchProfilingCommandsStart(
|
|||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
void GpgpuWalkerHelper<GfxFamily>::dispatchProfilingCommandsEnd(
|
void GpgpuWalkerHelper<GfxFamily>::dispatchProfilingCommandsEnd(
|
||||||
TagNode<HwTimeStamps> &hwTimeStamps,
|
HwTimeStamps &hwTimeStamps,
|
||||||
OCLRT::LinearStream *commandStream) {
|
OCLRT::LinearStream *commandStream) {
|
||||||
|
|
||||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||||
@ -133,7 +133,7 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchProfilingCommandsEnd(
|
|||||||
pPipeControlCmd->setCommandStreamerStallEnable(true);
|
pPipeControlCmd->setCommandStreamerStallEnable(true);
|
||||||
|
|
||||||
//MI_STORE_REGISTER_MEM for context local timestamp
|
//MI_STORE_REGISTER_MEM for context local timestamp
|
||||||
uint64_t TimeStampAddress = hwTimeStamps.getGraphicsAllocation()->getGpuAddress() + ptrDiff(&hwTimeStamps.tag->ContextEndTS, hwTimeStamps.tag);
|
uint64_t TimeStampAddress = reinterpret_cast<uint64_t>(&(hwTimeStamps.ContextEndTS));
|
||||||
|
|
||||||
//low part
|
//low part
|
||||||
auto pMICmdLow = (MI_STORE_REGISTER_MEM *)commandStream->getSpace(sizeof(MI_STORE_REGISTER_MEM));
|
auto pMICmdLow = (MI_STORE_REGISTER_MEM *)commandStream->getSpace(sizeof(MI_STORE_REGISTER_MEM));
|
||||||
|
@ -40,7 +40,7 @@ class HardwareInterface {
|
|||||||
cl_uint numEventsInWaitList,
|
cl_uint numEventsInWaitList,
|
||||||
const cl_event *eventWaitList,
|
const cl_event *eventWaitList,
|
||||||
KernelOperation **blockedCommandsData,
|
KernelOperation **blockedCommandsData,
|
||||||
TagNode<HwTimeStamps> *hwTimeStamps,
|
HwTimeStamps *hwTimeStamps,
|
||||||
HwPerfCounter *hwPerfCounter,
|
HwPerfCounter *hwPerfCounter,
|
||||||
TimestampPacketContainer *previousTimestampPacketNodes,
|
TimestampPacketContainer *previousTimestampPacketNodes,
|
||||||
TimestampPacketContainer *currentTimestampPacketNodes,
|
TimestampPacketContainer *currentTimestampPacketNodes,
|
||||||
@ -69,13 +69,13 @@ class HardwareInterface {
|
|||||||
static void dispatchProfilingPerfStartCommands(
|
static void dispatchProfilingPerfStartCommands(
|
||||||
const DispatchInfo &dispatchInfo,
|
const DispatchInfo &dispatchInfo,
|
||||||
const MultiDispatchInfo &multiDispatchInfo,
|
const MultiDispatchInfo &multiDispatchInfo,
|
||||||
TagNode<HwTimeStamps> *hwTimeStamps,
|
HwTimeStamps *hwTimeStamps,
|
||||||
HwPerfCounter *hwPerfCounter,
|
HwPerfCounter *hwPerfCounter,
|
||||||
LinearStream *commandStream,
|
LinearStream *commandStream,
|
||||||
CommandQueue &commandQueue);
|
CommandQueue &commandQueue);
|
||||||
|
|
||||||
static void dispatchProfilingPerfEndCommands(
|
static void dispatchProfilingPerfEndCommands(
|
||||||
TagNode<HwTimeStamps> *hwTimeStamps,
|
HwTimeStamps *hwTimeStamps,
|
||||||
HwPerfCounter *hwPerfCounter,
|
HwPerfCounter *hwPerfCounter,
|
||||||
LinearStream *commandStream,
|
LinearStream *commandStream,
|
||||||
CommandQueue &commandQueue);
|
CommandQueue &commandQueue);
|
||||||
|
@ -19,7 +19,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
|||||||
cl_uint numEventsInWaitList,
|
cl_uint numEventsInWaitList,
|
||||||
const cl_event *eventWaitList,
|
const cl_event *eventWaitList,
|
||||||
KernelOperation **blockedCommandsData,
|
KernelOperation **blockedCommandsData,
|
||||||
TagNode<HwTimeStamps> *hwTimeStamps,
|
HwTimeStamps *hwTimeStamps,
|
||||||
HwPerfCounter *hwPerfCounter,
|
HwPerfCounter *hwPerfCounter,
|
||||||
TimestampPacketContainer *previousTimestampPacketNodes,
|
TimestampPacketContainer *previousTimestampPacketNodes,
|
||||||
TimestampPacketContainer *currentTimestampPacketNodes,
|
TimestampPacketContainer *currentTimestampPacketNodes,
|
||||||
|
@ -59,7 +59,7 @@ template <typename GfxFamily>
|
|||||||
inline void HardwareInterface<GfxFamily>::dispatchProfilingPerfStartCommands(
|
inline void HardwareInterface<GfxFamily>::dispatchProfilingPerfStartCommands(
|
||||||
const DispatchInfo &dispatchInfo,
|
const DispatchInfo &dispatchInfo,
|
||||||
const MultiDispatchInfo &multiDispatchInfo,
|
const MultiDispatchInfo &multiDispatchInfo,
|
||||||
TagNode<HwTimeStamps> *hwTimeStamps,
|
HwTimeStamps *hwTimeStamps,
|
||||||
HwPerfCounter *hwPerfCounter,
|
HwPerfCounter *hwPerfCounter,
|
||||||
LinearStream *commandStream,
|
LinearStream *commandStream,
|
||||||
CommandQueue &commandQueue) {
|
CommandQueue &commandQueue) {
|
||||||
@ -77,7 +77,7 @@ inline void HardwareInterface<GfxFamily>::dispatchProfilingPerfStartCommands(
|
|||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
inline void HardwareInterface<GfxFamily>::dispatchProfilingPerfEndCommands(
|
inline void HardwareInterface<GfxFamily>::dispatchProfilingPerfEndCommands(
|
||||||
TagNode<HwTimeStamps> *hwTimeStamps,
|
HwTimeStamps *hwTimeStamps,
|
||||||
HwPerfCounter *hwPerfCounter,
|
HwPerfCounter *hwPerfCounter,
|
||||||
LinearStream *commandStream,
|
LinearStream *commandStream,
|
||||||
CommandQueue &commandQueue) {
|
CommandQueue &commandQueue) {
|
||||||
|
@ -143,7 +143,7 @@ void DeviceQueue::initDeviceQueue() {
|
|||||||
igilEventPool->m_size = caps.maxOnDeviceEvents;
|
igilEventPool->m_size = caps.maxOnDeviceEvents;
|
||||||
}
|
}
|
||||||
|
|
||||||
void DeviceQueue::setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentCount, uint32_t taskCount, TagNode<HwTimeStamps> *hwTimeStamp) {
|
void DeviceQueue::setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentCount, uint32_t taskCount, HwTimeStamps *hwTimeStamp) {
|
||||||
setupIndirectState(surfaceStateHeap, dynamicStateHeap, parentKernel, parentCount);
|
setupIndirectState(surfaceStateHeap, dynamicStateHeap, parentKernel, parentCount);
|
||||||
addExecutionModelCleanUpSection(parentKernel, hwTimeStamp, taskCount);
|
addExecutionModelCleanUpSection(parentKernel, hwTimeStamp, taskCount);
|
||||||
}
|
}
|
||||||
@ -152,7 +152,7 @@ void DeviceQueue::setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHea
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
void DeviceQueue::addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode<HwTimeStamps> *hwTimeStamp, uint32_t taskCount) {
|
void DeviceQueue::addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -22,8 +22,6 @@ class Event;
|
|||||||
struct MultiDispatchInfo;
|
struct MultiDispatchInfo;
|
||||||
class SchedulerKernel;
|
class SchedulerKernel;
|
||||||
struct HwTimeStamps;
|
struct HwTimeStamps;
|
||||||
template <class T>
|
|
||||||
struct TagNode;
|
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
struct OpenCLObjectMapper<_device_queue> {
|
struct OpenCLObjectMapper<_device_queue> {
|
||||||
@ -68,10 +66,10 @@ class DeviceQueue : public BaseObject<_device_queue> {
|
|||||||
size_t paramValueSize, void *paramValue,
|
size_t paramValueSize, void *paramValue,
|
||||||
size_t *paramValueSizeRet);
|
size_t *paramValueSizeRet);
|
||||||
|
|
||||||
void setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentCount, uint32_t taskCount, TagNode<HwTimeStamps> *hwTimeStamp);
|
void setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentCount, uint32_t taskCount, HwTimeStamps *hwTimeStamp);
|
||||||
|
|
||||||
virtual void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount);
|
virtual void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount);
|
||||||
virtual void addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode<HwTimeStamps> *hwTimeStamp, uint32_t taskCount);
|
virtual void addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount);
|
||||||
|
|
||||||
MOCKABLE_VIRTUAL bool isEMCriticalSectionFree() {
|
MOCKABLE_VIRTUAL bool isEMCriticalSectionFree() {
|
||||||
auto igilCmdQueue = reinterpret_cast<IGIL_CommandQueue *>(queueBuffer->getUnderlyingBuffer());
|
auto igilCmdQueue = reinterpret_cast<IGIL_CommandQueue *>(queueBuffer->getUnderlyingBuffer());
|
||||||
|
@ -55,7 +55,7 @@ class DeviceQueueHw : public DeviceQueue {
|
|||||||
|
|
||||||
void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount) override;
|
void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount) override;
|
||||||
|
|
||||||
void addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode<HwTimeStamps> *hwTimeStamp, uint32_t taskCount) override;
|
void addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount) override;
|
||||||
void resetDeviceQueue() override;
|
void resetDeviceQueue() override;
|
||||||
void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh) override;
|
void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh) override;
|
||||||
|
|
||||||
|
@ -12,7 +12,6 @@
|
|||||||
#include "runtime/helpers/preamble.h"
|
#include "runtime/helpers/preamble.h"
|
||||||
#include "runtime/helpers/string.h"
|
#include "runtime/helpers/string.h"
|
||||||
#include "runtime/memory_manager/memory_manager.h"
|
#include "runtime/memory_manager/memory_manager.h"
|
||||||
#include "runtime/utilities/tag_allocator.h"
|
|
||||||
|
|
||||||
namespace OCLRT {
|
namespace OCLRT {
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
@ -202,7 +201,7 @@ void DeviceQueueHw<GfxFamily>::buildSlbDummyCommands() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
void DeviceQueueHw<GfxFamily>::addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode<HwTimeStamps> *hwTimeStamp, uint32_t taskCount) {
|
void DeviceQueueHw<GfxFamily>::addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount) {
|
||||||
// CleanUp Section
|
// CleanUp Section
|
||||||
auto offset = slbCS.getUsed();
|
auto offset = slbCS.getUsed();
|
||||||
auto alignmentSize = alignUp(offset, MemoryConstants::pageSize) - offset;
|
auto alignmentSize = alignUp(offset, MemoryConstants::pageSize) - offset;
|
||||||
@ -216,7 +215,7 @@ void DeviceQueueHw<GfxFamily>::addExecutionModelCleanUpSection(Kernel *parentKer
|
|||||||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||||
|
|
||||||
if (hwTimeStamp != nullptr) {
|
if (hwTimeStamp != nullptr) {
|
||||||
uint64_t TimeStampAddress = hwTimeStamp->getGraphicsAllocation()->getGpuAddress() + ptrDiff(&hwTimeStamp->tag->ContextCompleteTS, hwTimeStamp->tag);
|
uint64_t TimeStampAddress = (uint64_t)((uintptr_t) & (hwTimeStamp->ContextCompleteTS));
|
||||||
igilQueue->m_controls.m_EventTimestampAddress = TimeStampAddress;
|
igilQueue->m_controls.m_EventTimestampAddress = TimeStampAddress;
|
||||||
|
|
||||||
addProfilingEndCmds(TimeStampAddress);
|
addProfilingEndCmds(TimeStampAddress);
|
||||||
|
@ -451,7 +451,7 @@ void Event::submitCommand(bool abortTasks) {
|
|||||||
if ((this->isProfilingEnabled()) && (this->cmdQueue != nullptr)) {
|
if ((this->isProfilingEnabled()) && (this->cmdQueue != nullptr)) {
|
||||||
if (timeStampNode) {
|
if (timeStampNode) {
|
||||||
this->cmdQueue->getCommandStreamReceiver().makeResident(*timeStampNode->getGraphicsAllocation());
|
this->cmdQueue->getCommandStreamReceiver().makeResident(*timeStampNode->getGraphicsAllocation());
|
||||||
cmdToProcess->timestamp = timeStampNode;
|
cmdToProcess->timestamp = timeStampNode->tag;
|
||||||
}
|
}
|
||||||
if (profilingCpuPath) {
|
if (profilingCpuPath) {
|
||||||
setSubmitTimeStamp();
|
setSubmitTimeStamp();
|
||||||
|
@ -26,8 +26,6 @@ class Surface;
|
|||||||
class PrintfHandler;
|
class PrintfHandler;
|
||||||
struct HwTimeStamps;
|
struct HwTimeStamps;
|
||||||
class TimestampPacketContainer;
|
class TimestampPacketContainer;
|
||||||
template <class T>
|
|
||||||
struct TagNode;
|
|
||||||
|
|
||||||
enum MapOperationType {
|
enum MapOperationType {
|
||||||
MAP,
|
MAP,
|
||||||
@ -44,7 +42,7 @@ class Command : public IFNode<Command> {
|
|||||||
virtual LinearStream *getCommandStream() {
|
virtual LinearStream *getCommandStream() {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
TagNode<HwTimeStamps> *timestamp = nullptr;
|
HwTimeStamps *timestamp = nullptr;
|
||||||
CompletionStamp completionStamp = {};
|
CompletionStamp completionStamp = {};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -7,7 +7,6 @@
|
|||||||
|
|
||||||
#include "hw_cmds.h"
|
#include "hw_cmds.h"
|
||||||
#include "runtime/helpers/options.h"
|
#include "runtime/helpers/options.h"
|
||||||
#include "runtime/utilities/tag_allocator.h"
|
|
||||||
#include "unit_tests/fixtures/device_host_queue_fixture.h"
|
#include "unit_tests/fixtures/device_host_queue_fixture.h"
|
||||||
#include "unit_tests/fixtures/execution_model_fixture.h"
|
#include "unit_tests/fixtures/execution_model_fixture.h"
|
||||||
#include "unit_tests/helpers/hw_parse.h"
|
#include "unit_tests/helpers/hw_parse.h"
|
||||||
@ -353,19 +352,18 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueSlb, AddEMCleanupSectionWithProfiling) {
|
|||||||
MockParentKernel *mockParentKernel = MockParentKernel::create(*pContext);
|
MockParentKernel *mockParentKernel = MockParentKernel::create(*pContext);
|
||||||
uint32_t taskCount = 7;
|
uint32_t taskCount = 7;
|
||||||
|
|
||||||
auto hwTimeStamp = pCommandQueue->getCommandStreamReceiver().getEventTsAllocator()->getTag();
|
HwTimeStamps hwTimeStamp;
|
||||||
mockDeviceQueueHw->buildSlbDummyCommands();
|
mockDeviceQueueHw->buildSlbDummyCommands();
|
||||||
mockDeviceQueueHw->addExecutionModelCleanUpSection(mockParentKernel, hwTimeStamp, taskCount);
|
mockDeviceQueueHw->addExecutionModelCleanUpSection(mockParentKernel, &hwTimeStamp, taskCount);
|
||||||
|
|
||||||
uint32_t eventTimestampAddrLow = static_cast<uint32_t>(igilCmdQueue->m_controls.m_EventTimestampAddress & 0xFFFFFFFF);
|
uint32_t eventTimestampLow = (uint32_t)(igilCmdQueue->m_controls.m_EventTimestampAddress & 0xFFFFFFFF);
|
||||||
uint32_t eventTimestampAddrHigh = static_cast<uint32_t>((igilCmdQueue->m_controls.m_EventTimestampAddress & 0xFFFFFFFF00000000) >> 32);
|
uint32_t eventTimestampHigh = (uint32_t)((igilCmdQueue->m_controls.m_EventTimestampAddress & 0xFFFFFFFF00000000) >> 32);
|
||||||
|
|
||||||
uint64_t contextCompleteAddr = hwTimeStamp->getGraphicsAllocation()->getGpuAddress() + ptrDiff(&hwTimeStamp->tag->ContextCompleteTS, hwTimeStamp->tag);
|
uint32_t contextCompleteLow = (uint32_t)((uint64_t)((uintptr_t)(&hwTimeStamp.ContextCompleteTS)) & 0xFFFFFFFF);
|
||||||
uint32_t contextCompleteAddrLow = static_cast<uint32_t>(contextCompleteAddr & 0xFFFFFFFF);
|
uint32_t contextCompleteHigh = (uint32_t)(((uint64_t)((uintptr_t)(&hwTimeStamp.ContextCompleteTS)) & 0xFFFFFFFF00000000) >> 32);
|
||||||
uint32_t contextCompleteAddrHigh = static_cast<uint32_t>((contextCompleteAddr & 0xFFFFFFFF00000000) >> 32);
|
|
||||||
|
|
||||||
EXPECT_EQ(contextCompleteAddrLow, eventTimestampAddrLow);
|
EXPECT_EQ(contextCompleteLow, eventTimestampLow);
|
||||||
EXPECT_EQ(contextCompleteAddrHigh, eventTimestampAddrHigh);
|
EXPECT_EQ(contextCompleteHigh, eventTimestampHigh);
|
||||||
|
|
||||||
HardwareParse hwParser;
|
HardwareParse hwParser;
|
||||||
auto *slbCS = mockDeviceQueueHw->getSlbCS();
|
auto *slbCS = mockDeviceQueueHw->getSlbCS();
|
||||||
|
@ -8,7 +8,6 @@
|
|||||||
#include "runtime/command_queue/gpgpu_walker.h"
|
#include "runtime/command_queue/gpgpu_walker.h"
|
||||||
#include "runtime/command_queue/hardware_interface.h"
|
#include "runtime/command_queue/hardware_interface.h"
|
||||||
#include "runtime/event/hw_timestamps.h"
|
#include "runtime/event/hw_timestamps.h"
|
||||||
#include "runtime/utilities/tag_allocator.h"
|
|
||||||
#include "runtime/helpers/kernel_commands.h"
|
#include "runtime/helpers/kernel_commands.h"
|
||||||
#include "runtime/helpers/task_information.h"
|
#include "runtime/helpers/task_information.h"
|
||||||
#include "unit_tests/mocks/mock_command_queue.h"
|
#include "unit_tests/mocks/mock_command_queue.h"
|
||||||
@ -57,9 +56,9 @@ class MockDeviceQueueHwWithCriticalSectionRelease : public DeviceQueueHw<GfxFami
|
|||||||
indirectStateSetup = true;
|
indirectStateSetup = true;
|
||||||
return BaseClass::setupIndirectState(surfaceStateHeap, dynamicStateHeap, parentKernel, parentIDCount);
|
return BaseClass::setupIndirectState(surfaceStateHeap, dynamicStateHeap, parentKernel, parentIDCount);
|
||||||
}
|
}
|
||||||
void addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode<HwTimeStamps> *hwTimeStamp, uint32_t taskCount) override {
|
void addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount) override {
|
||||||
cleanupSectionAdded = true;
|
cleanupSectionAdded = true;
|
||||||
timestampAddedInCleanupSection = hwTimeStamp ? hwTimeStamp->tag : nullptr;
|
timestampAddedInCleanupSection = hwTimeStamp;
|
||||||
return BaseClass::addExecutionModelCleanUpSection(parentKernel, hwTimeStamp, taskCount);
|
return BaseClass::addExecutionModelCleanUpSection(parentKernel, hwTimeStamp, taskCount);
|
||||||
}
|
}
|
||||||
void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh) override {
|
void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh) override {
|
||||||
@ -250,12 +249,13 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenBlockedParentKernelWithProfilingW
|
|||||||
std::vector<Surface *> surfaces;
|
std::vector<Surface *> surfaces;
|
||||||
auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, std::unique_ptr<KernelOperation>(blockedCommandData), surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);
|
auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, std::unique_ptr<KernelOperation>(blockedCommandData), surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);
|
||||||
|
|
||||||
auto timestamp = pCmdQ->getCommandStreamReceiver().getEventTsAllocator()->getTag();
|
HwTimeStamps timestamp;
|
||||||
cmdComputeKernel->timestamp = timestamp;
|
|
||||||
|
cmdComputeKernel->timestamp = ×tamp;
|
||||||
cmdComputeKernel->submit(0, false);
|
cmdComputeKernel->submit(0, false);
|
||||||
|
|
||||||
EXPECT_TRUE(mockDevQueue.cleanupSectionAdded);
|
EXPECT_TRUE(mockDevQueue.cleanupSectionAdded);
|
||||||
EXPECT_EQ(mockDevQueue.timestampAddedInCleanupSection, timestamp->tag);
|
EXPECT_EQ(mockDevQueue.timestampAddedInCleanupSection, ×tamp);
|
||||||
|
|
||||||
delete cmdComputeKernel;
|
delete cmdComputeKernel;
|
||||||
delete parentKernel;
|
delete parentKernel;
|
||||||
|
Reference in New Issue
Block a user