Revert "Use GPU instead of CPU address in programming commands for HwTim(...)"

This reverts commit 6202b2222b.
"Use GPU instead of CPU address in programming commands for HwTimeStamps"

Change-Id: I085382d95538ae41068a21c628d606039bf9cdf0
This commit is contained in:
Pawel Wilma
2018-12-20 16:32:47 +01:00
committed by sys_ocldev
parent 1e011f9a08
commit cc1f4bed60
14 changed files with 38 additions and 45 deletions

View File

@ -146,7 +146,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
auto devQueue = this->getContext().getDefaultDeviceQueue(); auto devQueue = this->getContext().getDefaultDeviceQueue();
DeviceQueueHw<GfxFamily> *devQueueHw = castToObject<DeviceQueueHw<GfxFamily>>(devQueue); DeviceQueueHw<GfxFamily> *devQueueHw = castToObject<DeviceQueueHw<GfxFamily>>(devQueue);
TagNode<HwTimeStamps> *hwTimeStamps = nullptr; HwTimeStamps *hwTimeStamps = nullptr;
auto commandStreamRecieverOwnership = getCommandStreamReceiver().obtainUniqueOwnership(); auto commandStreamRecieverOwnership = getCommandStreamReceiver().obtainUniqueOwnership();
@ -230,7 +230,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
} }
if (this->isProfilingEnabled()) { if (this->isProfilingEnabled()) {
// Get allocation for timestamps // Get allocation for timestamps
hwTimeStamps = eventBuilder.getEvent()->getHwTimeStampNode(); hwTimeStamps = eventBuilder.getEvent()->getHwTimeStampNode()->tag;
if (this->isPerfCountersEnabled()) { if (this->isPerfCountersEnabled()) {
hwPerfCounter = eventBuilder.getEvent()->getHwPerfCounterNode()->tag; hwPerfCounter = eventBuilder.getEvent()->getHwPerfCounterNode()->tag;
// PERF COUNTER: copy current configuration from queue to event // PERF COUNTER: copy current configuration from queue to event

View File

@ -139,11 +139,11 @@ class GpgpuWalkerHelper {
const iOpenCL::SPatchThreadPayload &threadPayload); const iOpenCL::SPatchThreadPayload &threadPayload);
static void dispatchProfilingCommandsStart( static void dispatchProfilingCommandsStart(
TagNode<HwTimeStamps> &hwTimeStamps, HwTimeStamps &hwTimeStamps,
OCLRT::LinearStream *commandStream); OCLRT::LinearStream *commandStream);
static void dispatchProfilingCommandsEnd( static void dispatchProfilingCommandsEnd(
TagNode<HwTimeStamps> &hwTimeStamps, HwTimeStamps &hwTimeStamps,
OCLRT::LinearStream *commandStream); OCLRT::LinearStream *commandStream);
static void dispatchPerfCountersNoopidRegisterCommands( static void dispatchPerfCountersNoopidRegisterCommands(

View File

@ -101,17 +101,17 @@ void GpgpuWalkerHelper<GfxFamily>::addAluReadModifyWriteRegister(
template <typename GfxFamily> template <typename GfxFamily>
void GpgpuWalkerHelper<GfxFamily>::dispatchProfilingCommandsStart( void GpgpuWalkerHelper<GfxFamily>::dispatchProfilingCommandsStart(
TagNode<HwTimeStamps> &hwTimeStamps, HwTimeStamps &hwTimeStamps,
OCLRT::LinearStream *commandStream) { OCLRT::LinearStream *commandStream) {
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
// PIPE_CONTROL for global timestamp // PIPE_CONTROL for global timestamp
uint64_t TimeStampAddress = hwTimeStamps.getGraphicsAllocation()->getGpuAddress() + ptrDiff(&hwTimeStamps.tag->GlobalStartTS, hwTimeStamps.tag); uint64_t TimeStampAddress = reinterpret_cast<uint64_t>(&(hwTimeStamps.GlobalStartTS));
PipeControlHelper<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(commandStream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, TimeStampAddress, 0llu); PipeControlHelper<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(commandStream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, TimeStampAddress, 0llu);
//MI_STORE_REGISTER_MEM for context local timestamp //MI_STORE_REGISTER_MEM for context local timestamp
TimeStampAddress = hwTimeStamps.getGraphicsAllocation()->getGpuAddress() + ptrDiff(&hwTimeStamps.tag->ContextStartTS, hwTimeStamps.tag); TimeStampAddress = reinterpret_cast<uint64_t>(&(hwTimeStamps.ContextStartTS));
//low part //low part
auto pMICmdLow = (MI_STORE_REGISTER_MEM *)commandStream->getSpace(sizeof(MI_STORE_REGISTER_MEM)); auto pMICmdLow = (MI_STORE_REGISTER_MEM *)commandStream->getSpace(sizeof(MI_STORE_REGISTER_MEM));
@ -122,7 +122,7 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchProfilingCommandsStart(
template <typename GfxFamily> template <typename GfxFamily>
void GpgpuWalkerHelper<GfxFamily>::dispatchProfilingCommandsEnd( void GpgpuWalkerHelper<GfxFamily>::dispatchProfilingCommandsEnd(
TagNode<HwTimeStamps> &hwTimeStamps, HwTimeStamps &hwTimeStamps,
OCLRT::LinearStream *commandStream) { OCLRT::LinearStream *commandStream) {
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
@ -133,7 +133,7 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchProfilingCommandsEnd(
pPipeControlCmd->setCommandStreamerStallEnable(true); pPipeControlCmd->setCommandStreamerStallEnable(true);
//MI_STORE_REGISTER_MEM for context local timestamp //MI_STORE_REGISTER_MEM for context local timestamp
uint64_t TimeStampAddress = hwTimeStamps.getGraphicsAllocation()->getGpuAddress() + ptrDiff(&hwTimeStamps.tag->ContextEndTS, hwTimeStamps.tag); uint64_t TimeStampAddress = reinterpret_cast<uint64_t>(&(hwTimeStamps.ContextEndTS));
//low part //low part
auto pMICmdLow = (MI_STORE_REGISTER_MEM *)commandStream->getSpace(sizeof(MI_STORE_REGISTER_MEM)); auto pMICmdLow = (MI_STORE_REGISTER_MEM *)commandStream->getSpace(sizeof(MI_STORE_REGISTER_MEM));

View File

@ -40,7 +40,7 @@ class HardwareInterface {
cl_uint numEventsInWaitList, cl_uint numEventsInWaitList,
const cl_event *eventWaitList, const cl_event *eventWaitList,
KernelOperation **blockedCommandsData, KernelOperation **blockedCommandsData,
TagNode<HwTimeStamps> *hwTimeStamps, HwTimeStamps *hwTimeStamps,
HwPerfCounter *hwPerfCounter, HwPerfCounter *hwPerfCounter,
TimestampPacketContainer *previousTimestampPacketNodes, TimestampPacketContainer *previousTimestampPacketNodes,
TimestampPacketContainer *currentTimestampPacketNodes, TimestampPacketContainer *currentTimestampPacketNodes,
@ -69,13 +69,13 @@ class HardwareInterface {
static void dispatchProfilingPerfStartCommands( static void dispatchProfilingPerfStartCommands(
const DispatchInfo &dispatchInfo, const DispatchInfo &dispatchInfo,
const MultiDispatchInfo &multiDispatchInfo, const MultiDispatchInfo &multiDispatchInfo,
TagNode<HwTimeStamps> *hwTimeStamps, HwTimeStamps *hwTimeStamps,
HwPerfCounter *hwPerfCounter, HwPerfCounter *hwPerfCounter,
LinearStream *commandStream, LinearStream *commandStream,
CommandQueue &commandQueue); CommandQueue &commandQueue);
static void dispatchProfilingPerfEndCommands( static void dispatchProfilingPerfEndCommands(
TagNode<HwTimeStamps> *hwTimeStamps, HwTimeStamps *hwTimeStamps,
HwPerfCounter *hwPerfCounter, HwPerfCounter *hwPerfCounter,
LinearStream *commandStream, LinearStream *commandStream,
CommandQueue &commandQueue); CommandQueue &commandQueue);

View File

@ -19,7 +19,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
cl_uint numEventsInWaitList, cl_uint numEventsInWaitList,
const cl_event *eventWaitList, const cl_event *eventWaitList,
KernelOperation **blockedCommandsData, KernelOperation **blockedCommandsData,
TagNode<HwTimeStamps> *hwTimeStamps, HwTimeStamps *hwTimeStamps,
HwPerfCounter *hwPerfCounter, HwPerfCounter *hwPerfCounter,
TimestampPacketContainer *previousTimestampPacketNodes, TimestampPacketContainer *previousTimestampPacketNodes,
TimestampPacketContainer *currentTimestampPacketNodes, TimestampPacketContainer *currentTimestampPacketNodes,

View File

@ -59,7 +59,7 @@ template <typename GfxFamily>
inline void HardwareInterface<GfxFamily>::dispatchProfilingPerfStartCommands( inline void HardwareInterface<GfxFamily>::dispatchProfilingPerfStartCommands(
const DispatchInfo &dispatchInfo, const DispatchInfo &dispatchInfo,
const MultiDispatchInfo &multiDispatchInfo, const MultiDispatchInfo &multiDispatchInfo,
TagNode<HwTimeStamps> *hwTimeStamps, HwTimeStamps *hwTimeStamps,
HwPerfCounter *hwPerfCounter, HwPerfCounter *hwPerfCounter,
LinearStream *commandStream, LinearStream *commandStream,
CommandQueue &commandQueue) { CommandQueue &commandQueue) {
@ -77,7 +77,7 @@ inline void HardwareInterface<GfxFamily>::dispatchProfilingPerfStartCommands(
template <typename GfxFamily> template <typename GfxFamily>
inline void HardwareInterface<GfxFamily>::dispatchProfilingPerfEndCommands( inline void HardwareInterface<GfxFamily>::dispatchProfilingPerfEndCommands(
TagNode<HwTimeStamps> *hwTimeStamps, HwTimeStamps *hwTimeStamps,
HwPerfCounter *hwPerfCounter, HwPerfCounter *hwPerfCounter,
LinearStream *commandStream, LinearStream *commandStream,
CommandQueue &commandQueue) { CommandQueue &commandQueue) {

View File

@ -143,7 +143,7 @@ void DeviceQueue::initDeviceQueue() {
igilEventPool->m_size = caps.maxOnDeviceEvents; igilEventPool->m_size = caps.maxOnDeviceEvents;
} }
void DeviceQueue::setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentCount, uint32_t taskCount, TagNode<HwTimeStamps> *hwTimeStamp) { void DeviceQueue::setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentCount, uint32_t taskCount, HwTimeStamps *hwTimeStamp) {
setupIndirectState(surfaceStateHeap, dynamicStateHeap, parentKernel, parentCount); setupIndirectState(surfaceStateHeap, dynamicStateHeap, parentKernel, parentCount);
addExecutionModelCleanUpSection(parentKernel, hwTimeStamp, taskCount); addExecutionModelCleanUpSection(parentKernel, hwTimeStamp, taskCount);
} }
@ -152,7 +152,7 @@ void DeviceQueue::setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHea
return; return;
} }
void DeviceQueue::addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode<HwTimeStamps> *hwTimeStamp, uint32_t taskCount) { void DeviceQueue::addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount) {
return; return;
} }

View File

@ -22,8 +22,6 @@ class Event;
struct MultiDispatchInfo; struct MultiDispatchInfo;
class SchedulerKernel; class SchedulerKernel;
struct HwTimeStamps; struct HwTimeStamps;
template <class T>
struct TagNode;
template <> template <>
struct OpenCLObjectMapper<_device_queue> { struct OpenCLObjectMapper<_device_queue> {
@ -68,10 +66,10 @@ class DeviceQueue : public BaseObject<_device_queue> {
size_t paramValueSize, void *paramValue, size_t paramValueSize, void *paramValue,
size_t *paramValueSizeRet); size_t *paramValueSizeRet);
void setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentCount, uint32_t taskCount, TagNode<HwTimeStamps> *hwTimeStamp); void setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentCount, uint32_t taskCount, HwTimeStamps *hwTimeStamp);
virtual void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount); virtual void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount);
virtual void addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode<HwTimeStamps> *hwTimeStamp, uint32_t taskCount); virtual void addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount);
MOCKABLE_VIRTUAL bool isEMCriticalSectionFree() { MOCKABLE_VIRTUAL bool isEMCriticalSectionFree() {
auto igilCmdQueue = reinterpret_cast<IGIL_CommandQueue *>(queueBuffer->getUnderlyingBuffer()); auto igilCmdQueue = reinterpret_cast<IGIL_CommandQueue *>(queueBuffer->getUnderlyingBuffer());

View File

@ -55,7 +55,7 @@ class DeviceQueueHw : public DeviceQueue {
void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount) override; void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount) override;
void addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode<HwTimeStamps> *hwTimeStamp, uint32_t taskCount) override; void addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount) override;
void resetDeviceQueue() override; void resetDeviceQueue() override;
void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh) override; void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh) override;

View File

@ -12,7 +12,6 @@
#include "runtime/helpers/preamble.h" #include "runtime/helpers/preamble.h"
#include "runtime/helpers/string.h" #include "runtime/helpers/string.h"
#include "runtime/memory_manager/memory_manager.h" #include "runtime/memory_manager/memory_manager.h"
#include "runtime/utilities/tag_allocator.h"
namespace OCLRT { namespace OCLRT {
template <typename GfxFamily> template <typename GfxFamily>
@ -202,7 +201,7 @@ void DeviceQueueHw<GfxFamily>::buildSlbDummyCommands() {
} }
template <typename GfxFamily> template <typename GfxFamily>
void DeviceQueueHw<GfxFamily>::addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode<HwTimeStamps> *hwTimeStamp, uint32_t taskCount) { void DeviceQueueHw<GfxFamily>::addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount) {
// CleanUp Section // CleanUp Section
auto offset = slbCS.getUsed(); auto offset = slbCS.getUsed();
auto alignmentSize = alignUp(offset, MemoryConstants::pageSize) - offset; auto alignmentSize = alignUp(offset, MemoryConstants::pageSize) - offset;
@ -216,7 +215,7 @@ void DeviceQueueHw<GfxFamily>::addExecutionModelCleanUpSection(Kernel *parentKer
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
if (hwTimeStamp != nullptr) { if (hwTimeStamp != nullptr) {
uint64_t TimeStampAddress = hwTimeStamp->getGraphicsAllocation()->getGpuAddress() + ptrDiff(&hwTimeStamp->tag->ContextCompleteTS, hwTimeStamp->tag); uint64_t TimeStampAddress = (uint64_t)((uintptr_t) & (hwTimeStamp->ContextCompleteTS));
igilQueue->m_controls.m_EventTimestampAddress = TimeStampAddress; igilQueue->m_controls.m_EventTimestampAddress = TimeStampAddress;
addProfilingEndCmds(TimeStampAddress); addProfilingEndCmds(TimeStampAddress);

View File

@ -451,7 +451,7 @@ void Event::submitCommand(bool abortTasks) {
if ((this->isProfilingEnabled()) && (this->cmdQueue != nullptr)) { if ((this->isProfilingEnabled()) && (this->cmdQueue != nullptr)) {
if (timeStampNode) { if (timeStampNode) {
this->cmdQueue->getCommandStreamReceiver().makeResident(*timeStampNode->getGraphicsAllocation()); this->cmdQueue->getCommandStreamReceiver().makeResident(*timeStampNode->getGraphicsAllocation());
cmdToProcess->timestamp = timeStampNode; cmdToProcess->timestamp = timeStampNode->tag;
} }
if (profilingCpuPath) { if (profilingCpuPath) {
setSubmitTimeStamp(); setSubmitTimeStamp();

View File

@ -26,8 +26,6 @@ class Surface;
class PrintfHandler; class PrintfHandler;
struct HwTimeStamps; struct HwTimeStamps;
class TimestampPacketContainer; class TimestampPacketContainer;
template <class T>
struct TagNode;
enum MapOperationType { enum MapOperationType {
MAP, MAP,
@ -44,7 +42,7 @@ class Command : public IFNode<Command> {
virtual LinearStream *getCommandStream() { virtual LinearStream *getCommandStream() {
return nullptr; return nullptr;
} }
TagNode<HwTimeStamps> *timestamp = nullptr; HwTimeStamps *timestamp = nullptr;
CompletionStamp completionStamp = {}; CompletionStamp completionStamp = {};
}; };

View File

@ -7,7 +7,6 @@
#include "hw_cmds.h" #include "hw_cmds.h"
#include "runtime/helpers/options.h" #include "runtime/helpers/options.h"
#include "runtime/utilities/tag_allocator.h"
#include "unit_tests/fixtures/device_host_queue_fixture.h" #include "unit_tests/fixtures/device_host_queue_fixture.h"
#include "unit_tests/fixtures/execution_model_fixture.h" #include "unit_tests/fixtures/execution_model_fixture.h"
#include "unit_tests/helpers/hw_parse.h" #include "unit_tests/helpers/hw_parse.h"
@ -353,19 +352,18 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueSlb, AddEMCleanupSectionWithProfiling) {
MockParentKernel *mockParentKernel = MockParentKernel::create(*pContext); MockParentKernel *mockParentKernel = MockParentKernel::create(*pContext);
uint32_t taskCount = 7; uint32_t taskCount = 7;
auto hwTimeStamp = pCommandQueue->getCommandStreamReceiver().getEventTsAllocator()->getTag(); HwTimeStamps hwTimeStamp;
mockDeviceQueueHw->buildSlbDummyCommands(); mockDeviceQueueHw->buildSlbDummyCommands();
mockDeviceQueueHw->addExecutionModelCleanUpSection(mockParentKernel, hwTimeStamp, taskCount); mockDeviceQueueHw->addExecutionModelCleanUpSection(mockParentKernel, &hwTimeStamp, taskCount);
uint32_t eventTimestampAddrLow = static_cast<uint32_t>(igilCmdQueue->m_controls.m_EventTimestampAddress & 0xFFFFFFFF); uint32_t eventTimestampLow = (uint32_t)(igilCmdQueue->m_controls.m_EventTimestampAddress & 0xFFFFFFFF);
uint32_t eventTimestampAddrHigh = static_cast<uint32_t>((igilCmdQueue->m_controls.m_EventTimestampAddress & 0xFFFFFFFF00000000) >> 32); uint32_t eventTimestampHigh = (uint32_t)((igilCmdQueue->m_controls.m_EventTimestampAddress & 0xFFFFFFFF00000000) >> 32);
uint64_t contextCompleteAddr = hwTimeStamp->getGraphicsAllocation()->getGpuAddress() + ptrDiff(&hwTimeStamp->tag->ContextCompleteTS, hwTimeStamp->tag); uint32_t contextCompleteLow = (uint32_t)((uint64_t)((uintptr_t)(&hwTimeStamp.ContextCompleteTS)) & 0xFFFFFFFF);
uint32_t contextCompleteAddrLow = static_cast<uint32_t>(contextCompleteAddr & 0xFFFFFFFF); uint32_t contextCompleteHigh = (uint32_t)(((uint64_t)((uintptr_t)(&hwTimeStamp.ContextCompleteTS)) & 0xFFFFFFFF00000000) >> 32);
uint32_t contextCompleteAddrHigh = static_cast<uint32_t>((contextCompleteAddr & 0xFFFFFFFF00000000) >> 32);
EXPECT_EQ(contextCompleteAddrLow, eventTimestampAddrLow); EXPECT_EQ(contextCompleteLow, eventTimestampLow);
EXPECT_EQ(contextCompleteAddrHigh, eventTimestampAddrHigh); EXPECT_EQ(contextCompleteHigh, eventTimestampHigh);
HardwareParse hwParser; HardwareParse hwParser;
auto *slbCS = mockDeviceQueueHw->getSlbCS(); auto *slbCS = mockDeviceQueueHw->getSlbCS();

View File

@ -8,7 +8,6 @@
#include "runtime/command_queue/gpgpu_walker.h" #include "runtime/command_queue/gpgpu_walker.h"
#include "runtime/command_queue/hardware_interface.h" #include "runtime/command_queue/hardware_interface.h"
#include "runtime/event/hw_timestamps.h" #include "runtime/event/hw_timestamps.h"
#include "runtime/utilities/tag_allocator.h"
#include "runtime/helpers/kernel_commands.h" #include "runtime/helpers/kernel_commands.h"
#include "runtime/helpers/task_information.h" #include "runtime/helpers/task_information.h"
#include "unit_tests/mocks/mock_command_queue.h" #include "unit_tests/mocks/mock_command_queue.h"
@ -57,9 +56,9 @@ class MockDeviceQueueHwWithCriticalSectionRelease : public DeviceQueueHw<GfxFami
indirectStateSetup = true; indirectStateSetup = true;
return BaseClass::setupIndirectState(surfaceStateHeap, dynamicStateHeap, parentKernel, parentIDCount); return BaseClass::setupIndirectState(surfaceStateHeap, dynamicStateHeap, parentKernel, parentIDCount);
} }
void addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode<HwTimeStamps> *hwTimeStamp, uint32_t taskCount) override { void addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount) override {
cleanupSectionAdded = true; cleanupSectionAdded = true;
timestampAddedInCleanupSection = hwTimeStamp ? hwTimeStamp->tag : nullptr; timestampAddedInCleanupSection = hwTimeStamp;
return BaseClass::addExecutionModelCleanUpSection(parentKernel, hwTimeStamp, taskCount); return BaseClass::addExecutionModelCleanUpSection(parentKernel, hwTimeStamp, taskCount);
} }
void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh) override { void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh) override {
@ -250,12 +249,13 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenBlockedParentKernelWithProfilingW
std::vector<Surface *> surfaces; std::vector<Surface *> surfaces;
auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, std::unique_ptr<KernelOperation>(blockedCommandData), surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1); auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, std::unique_ptr<KernelOperation>(blockedCommandData), surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);
auto timestamp = pCmdQ->getCommandStreamReceiver().getEventTsAllocator()->getTag(); HwTimeStamps timestamp;
cmdComputeKernel->timestamp = timestamp;
cmdComputeKernel->timestamp = &timestamp;
cmdComputeKernel->submit(0, false); cmdComputeKernel->submit(0, false);
EXPECT_TRUE(mockDevQueue.cleanupSectionAdded); EXPECT_TRUE(mockDevQueue.cleanupSectionAdded);
EXPECT_EQ(mockDevQueue.timestampAddedInCleanupSection, timestamp->tag); EXPECT_EQ(mockDevQueue.timestampAddedInCleanupSection, &timestamp);
delete cmdComputeKernel; delete cmdComputeKernel;
delete parentKernel; delete parentKernel;