Revert "Use GPU instead of CPU address in programming commands for HwTim(...)"
This reverts commit 6202b2222b
.
"Use GPU instead of CPU address in programming commands for HwTimeStamps"
Change-Id: I085382d95538ae41068a21c628d606039bf9cdf0
This commit is contained in:
parent
1e011f9a08
commit
cc1f4bed60
|
@ -146,7 +146,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
|||
auto devQueue = this->getContext().getDefaultDeviceQueue();
|
||||
DeviceQueueHw<GfxFamily> *devQueueHw = castToObject<DeviceQueueHw<GfxFamily>>(devQueue);
|
||||
|
||||
TagNode<HwTimeStamps> *hwTimeStamps = nullptr;
|
||||
HwTimeStamps *hwTimeStamps = nullptr;
|
||||
|
||||
auto commandStreamRecieverOwnership = getCommandStreamReceiver().obtainUniqueOwnership();
|
||||
|
||||
|
@ -230,7 +230,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
|||
}
|
||||
if (this->isProfilingEnabled()) {
|
||||
// Get allocation for timestamps
|
||||
hwTimeStamps = eventBuilder.getEvent()->getHwTimeStampNode();
|
||||
hwTimeStamps = eventBuilder.getEvent()->getHwTimeStampNode()->tag;
|
||||
if (this->isPerfCountersEnabled()) {
|
||||
hwPerfCounter = eventBuilder.getEvent()->getHwPerfCounterNode()->tag;
|
||||
// PERF COUNTER: copy current configuration from queue to event
|
||||
|
|
|
@ -139,11 +139,11 @@ class GpgpuWalkerHelper {
|
|||
const iOpenCL::SPatchThreadPayload &threadPayload);
|
||||
|
||||
static void dispatchProfilingCommandsStart(
|
||||
TagNode<HwTimeStamps> &hwTimeStamps,
|
||||
HwTimeStamps &hwTimeStamps,
|
||||
OCLRT::LinearStream *commandStream);
|
||||
|
||||
static void dispatchProfilingCommandsEnd(
|
||||
TagNode<HwTimeStamps> &hwTimeStamps,
|
||||
HwTimeStamps &hwTimeStamps,
|
||||
OCLRT::LinearStream *commandStream);
|
||||
|
||||
static void dispatchPerfCountersNoopidRegisterCommands(
|
||||
|
|
|
@ -101,17 +101,17 @@ void GpgpuWalkerHelper<GfxFamily>::addAluReadModifyWriteRegister(
|
|||
|
||||
template <typename GfxFamily>
|
||||
void GpgpuWalkerHelper<GfxFamily>::dispatchProfilingCommandsStart(
|
||||
TagNode<HwTimeStamps> &hwTimeStamps,
|
||||
HwTimeStamps &hwTimeStamps,
|
||||
OCLRT::LinearStream *commandStream) {
|
||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||
|
||||
// PIPE_CONTROL for global timestamp
|
||||
uint64_t TimeStampAddress = hwTimeStamps.getGraphicsAllocation()->getGpuAddress() + ptrDiff(&hwTimeStamps.tag->GlobalStartTS, hwTimeStamps.tag);
|
||||
uint64_t TimeStampAddress = reinterpret_cast<uint64_t>(&(hwTimeStamps.GlobalStartTS));
|
||||
|
||||
PipeControlHelper<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(commandStream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, TimeStampAddress, 0llu);
|
||||
|
||||
//MI_STORE_REGISTER_MEM for context local timestamp
|
||||
TimeStampAddress = hwTimeStamps.getGraphicsAllocation()->getGpuAddress() + ptrDiff(&hwTimeStamps.tag->ContextStartTS, hwTimeStamps.tag);
|
||||
TimeStampAddress = reinterpret_cast<uint64_t>(&(hwTimeStamps.ContextStartTS));
|
||||
|
||||
//low part
|
||||
auto pMICmdLow = (MI_STORE_REGISTER_MEM *)commandStream->getSpace(sizeof(MI_STORE_REGISTER_MEM));
|
||||
|
@ -122,7 +122,7 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchProfilingCommandsStart(
|
|||
|
||||
template <typename GfxFamily>
|
||||
void GpgpuWalkerHelper<GfxFamily>::dispatchProfilingCommandsEnd(
|
||||
TagNode<HwTimeStamps> &hwTimeStamps,
|
||||
HwTimeStamps &hwTimeStamps,
|
||||
OCLRT::LinearStream *commandStream) {
|
||||
|
||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||
|
@ -133,7 +133,7 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchProfilingCommandsEnd(
|
|||
pPipeControlCmd->setCommandStreamerStallEnable(true);
|
||||
|
||||
//MI_STORE_REGISTER_MEM for context local timestamp
|
||||
uint64_t TimeStampAddress = hwTimeStamps.getGraphicsAllocation()->getGpuAddress() + ptrDiff(&hwTimeStamps.tag->ContextEndTS, hwTimeStamps.tag);
|
||||
uint64_t TimeStampAddress = reinterpret_cast<uint64_t>(&(hwTimeStamps.ContextEndTS));
|
||||
|
||||
//low part
|
||||
auto pMICmdLow = (MI_STORE_REGISTER_MEM *)commandStream->getSpace(sizeof(MI_STORE_REGISTER_MEM));
|
||||
|
|
|
@ -40,7 +40,7 @@ class HardwareInterface {
|
|||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
KernelOperation **blockedCommandsData,
|
||||
TagNode<HwTimeStamps> *hwTimeStamps,
|
||||
HwTimeStamps *hwTimeStamps,
|
||||
HwPerfCounter *hwPerfCounter,
|
||||
TimestampPacketContainer *previousTimestampPacketNodes,
|
||||
TimestampPacketContainer *currentTimestampPacketNodes,
|
||||
|
@ -69,13 +69,13 @@ class HardwareInterface {
|
|||
static void dispatchProfilingPerfStartCommands(
|
||||
const DispatchInfo &dispatchInfo,
|
||||
const MultiDispatchInfo &multiDispatchInfo,
|
||||
TagNode<HwTimeStamps> *hwTimeStamps,
|
||||
HwTimeStamps *hwTimeStamps,
|
||||
HwPerfCounter *hwPerfCounter,
|
||||
LinearStream *commandStream,
|
||||
CommandQueue &commandQueue);
|
||||
|
||||
static void dispatchProfilingPerfEndCommands(
|
||||
TagNode<HwTimeStamps> *hwTimeStamps,
|
||||
HwTimeStamps *hwTimeStamps,
|
||||
HwPerfCounter *hwPerfCounter,
|
||||
LinearStream *commandStream,
|
||||
CommandQueue &commandQueue);
|
||||
|
|
|
@ -19,7 +19,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
|||
cl_uint numEventsInWaitList,
|
||||
const cl_event *eventWaitList,
|
||||
KernelOperation **blockedCommandsData,
|
||||
TagNode<HwTimeStamps> *hwTimeStamps,
|
||||
HwTimeStamps *hwTimeStamps,
|
||||
HwPerfCounter *hwPerfCounter,
|
||||
TimestampPacketContainer *previousTimestampPacketNodes,
|
||||
TimestampPacketContainer *currentTimestampPacketNodes,
|
||||
|
|
|
@ -59,7 +59,7 @@ template <typename GfxFamily>
|
|||
inline void HardwareInterface<GfxFamily>::dispatchProfilingPerfStartCommands(
|
||||
const DispatchInfo &dispatchInfo,
|
||||
const MultiDispatchInfo &multiDispatchInfo,
|
||||
TagNode<HwTimeStamps> *hwTimeStamps,
|
||||
HwTimeStamps *hwTimeStamps,
|
||||
HwPerfCounter *hwPerfCounter,
|
||||
LinearStream *commandStream,
|
||||
CommandQueue &commandQueue) {
|
||||
|
@ -77,7 +77,7 @@ inline void HardwareInterface<GfxFamily>::dispatchProfilingPerfStartCommands(
|
|||
|
||||
template <typename GfxFamily>
|
||||
inline void HardwareInterface<GfxFamily>::dispatchProfilingPerfEndCommands(
|
||||
TagNode<HwTimeStamps> *hwTimeStamps,
|
||||
HwTimeStamps *hwTimeStamps,
|
||||
HwPerfCounter *hwPerfCounter,
|
||||
LinearStream *commandStream,
|
||||
CommandQueue &commandQueue) {
|
||||
|
|
|
@ -143,7 +143,7 @@ void DeviceQueue::initDeviceQueue() {
|
|||
igilEventPool->m_size = caps.maxOnDeviceEvents;
|
||||
}
|
||||
|
||||
void DeviceQueue::setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentCount, uint32_t taskCount, TagNode<HwTimeStamps> *hwTimeStamp) {
|
||||
void DeviceQueue::setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentCount, uint32_t taskCount, HwTimeStamps *hwTimeStamp) {
|
||||
setupIndirectState(surfaceStateHeap, dynamicStateHeap, parentKernel, parentCount);
|
||||
addExecutionModelCleanUpSection(parentKernel, hwTimeStamp, taskCount);
|
||||
}
|
||||
|
@ -152,7 +152,7 @@ void DeviceQueue::setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHea
|
|||
return;
|
||||
}
|
||||
|
||||
void DeviceQueue::addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode<HwTimeStamps> *hwTimeStamp, uint32_t taskCount) {
|
||||
void DeviceQueue::addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -22,8 +22,6 @@ class Event;
|
|||
struct MultiDispatchInfo;
|
||||
class SchedulerKernel;
|
||||
struct HwTimeStamps;
|
||||
template <class T>
|
||||
struct TagNode;
|
||||
|
||||
template <>
|
||||
struct OpenCLObjectMapper<_device_queue> {
|
||||
|
@ -68,10 +66,10 @@ class DeviceQueue : public BaseObject<_device_queue> {
|
|||
size_t paramValueSize, void *paramValue,
|
||||
size_t *paramValueSizeRet);
|
||||
|
||||
void setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentCount, uint32_t taskCount, TagNode<HwTimeStamps> *hwTimeStamp);
|
||||
void setupExecutionModelDispatch(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentCount, uint32_t taskCount, HwTimeStamps *hwTimeStamp);
|
||||
|
||||
virtual void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount);
|
||||
virtual void addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode<HwTimeStamps> *hwTimeStamp, uint32_t taskCount);
|
||||
virtual void addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount);
|
||||
|
||||
MOCKABLE_VIRTUAL bool isEMCriticalSectionFree() {
|
||||
auto igilCmdQueue = reinterpret_cast<IGIL_CommandQueue *>(queueBuffer->getUnderlyingBuffer());
|
||||
|
|
|
@ -55,7 +55,7 @@ class DeviceQueueHw : public DeviceQueue {
|
|||
|
||||
void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount) override;
|
||||
|
||||
void addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode<HwTimeStamps> *hwTimeStamp, uint32_t taskCount) override;
|
||||
void addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount) override;
|
||||
void resetDeviceQueue() override;
|
||||
void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh) override;
|
||||
|
||||
|
|
|
@ -12,7 +12,6 @@
|
|||
#include "runtime/helpers/preamble.h"
|
||||
#include "runtime/helpers/string.h"
|
||||
#include "runtime/memory_manager/memory_manager.h"
|
||||
#include "runtime/utilities/tag_allocator.h"
|
||||
|
||||
namespace OCLRT {
|
||||
template <typename GfxFamily>
|
||||
|
@ -202,7 +201,7 @@ void DeviceQueueHw<GfxFamily>::buildSlbDummyCommands() {
|
|||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void DeviceQueueHw<GfxFamily>::addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode<HwTimeStamps> *hwTimeStamp, uint32_t taskCount) {
|
||||
void DeviceQueueHw<GfxFamily>::addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount) {
|
||||
// CleanUp Section
|
||||
auto offset = slbCS.getUsed();
|
||||
auto alignmentSize = alignUp(offset, MemoryConstants::pageSize) - offset;
|
||||
|
@ -216,7 +215,7 @@ void DeviceQueueHw<GfxFamily>::addExecutionModelCleanUpSection(Kernel *parentKer
|
|||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||
|
||||
if (hwTimeStamp != nullptr) {
|
||||
uint64_t TimeStampAddress = hwTimeStamp->getGraphicsAllocation()->getGpuAddress() + ptrDiff(&hwTimeStamp->tag->ContextCompleteTS, hwTimeStamp->tag);
|
||||
uint64_t TimeStampAddress = (uint64_t)((uintptr_t) & (hwTimeStamp->ContextCompleteTS));
|
||||
igilQueue->m_controls.m_EventTimestampAddress = TimeStampAddress;
|
||||
|
||||
addProfilingEndCmds(TimeStampAddress);
|
||||
|
|
|
@ -451,7 +451,7 @@ void Event::submitCommand(bool abortTasks) {
|
|||
if ((this->isProfilingEnabled()) && (this->cmdQueue != nullptr)) {
|
||||
if (timeStampNode) {
|
||||
this->cmdQueue->getCommandStreamReceiver().makeResident(*timeStampNode->getGraphicsAllocation());
|
||||
cmdToProcess->timestamp = timeStampNode;
|
||||
cmdToProcess->timestamp = timeStampNode->tag;
|
||||
}
|
||||
if (profilingCpuPath) {
|
||||
setSubmitTimeStamp();
|
||||
|
|
|
@ -26,8 +26,6 @@ class Surface;
|
|||
class PrintfHandler;
|
||||
struct HwTimeStamps;
|
||||
class TimestampPacketContainer;
|
||||
template <class T>
|
||||
struct TagNode;
|
||||
|
||||
enum MapOperationType {
|
||||
MAP,
|
||||
|
@ -44,7 +42,7 @@ class Command : public IFNode<Command> {
|
|||
virtual LinearStream *getCommandStream() {
|
||||
return nullptr;
|
||||
}
|
||||
TagNode<HwTimeStamps> *timestamp = nullptr;
|
||||
HwTimeStamps *timestamp = nullptr;
|
||||
CompletionStamp completionStamp = {};
|
||||
};
|
||||
|
||||
|
|
|
@ -7,7 +7,6 @@
|
|||
|
||||
#include "hw_cmds.h"
|
||||
#include "runtime/helpers/options.h"
|
||||
#include "runtime/utilities/tag_allocator.h"
|
||||
#include "unit_tests/fixtures/device_host_queue_fixture.h"
|
||||
#include "unit_tests/fixtures/execution_model_fixture.h"
|
||||
#include "unit_tests/helpers/hw_parse.h"
|
||||
|
@ -353,19 +352,18 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceQueueSlb, AddEMCleanupSectionWithProfiling) {
|
|||
MockParentKernel *mockParentKernel = MockParentKernel::create(*pContext);
|
||||
uint32_t taskCount = 7;
|
||||
|
||||
auto hwTimeStamp = pCommandQueue->getCommandStreamReceiver().getEventTsAllocator()->getTag();
|
||||
HwTimeStamps hwTimeStamp;
|
||||
mockDeviceQueueHw->buildSlbDummyCommands();
|
||||
mockDeviceQueueHw->addExecutionModelCleanUpSection(mockParentKernel, hwTimeStamp, taskCount);
|
||||
mockDeviceQueueHw->addExecutionModelCleanUpSection(mockParentKernel, &hwTimeStamp, taskCount);
|
||||
|
||||
uint32_t eventTimestampAddrLow = static_cast<uint32_t>(igilCmdQueue->m_controls.m_EventTimestampAddress & 0xFFFFFFFF);
|
||||
uint32_t eventTimestampAddrHigh = static_cast<uint32_t>((igilCmdQueue->m_controls.m_EventTimestampAddress & 0xFFFFFFFF00000000) >> 32);
|
||||
uint32_t eventTimestampLow = (uint32_t)(igilCmdQueue->m_controls.m_EventTimestampAddress & 0xFFFFFFFF);
|
||||
uint32_t eventTimestampHigh = (uint32_t)((igilCmdQueue->m_controls.m_EventTimestampAddress & 0xFFFFFFFF00000000) >> 32);
|
||||
|
||||
uint64_t contextCompleteAddr = hwTimeStamp->getGraphicsAllocation()->getGpuAddress() + ptrDiff(&hwTimeStamp->tag->ContextCompleteTS, hwTimeStamp->tag);
|
||||
uint32_t contextCompleteAddrLow = static_cast<uint32_t>(contextCompleteAddr & 0xFFFFFFFF);
|
||||
uint32_t contextCompleteAddrHigh = static_cast<uint32_t>((contextCompleteAddr & 0xFFFFFFFF00000000) >> 32);
|
||||
uint32_t contextCompleteLow = (uint32_t)((uint64_t)((uintptr_t)(&hwTimeStamp.ContextCompleteTS)) & 0xFFFFFFFF);
|
||||
uint32_t contextCompleteHigh = (uint32_t)(((uint64_t)((uintptr_t)(&hwTimeStamp.ContextCompleteTS)) & 0xFFFFFFFF00000000) >> 32);
|
||||
|
||||
EXPECT_EQ(contextCompleteAddrLow, eventTimestampAddrLow);
|
||||
EXPECT_EQ(contextCompleteAddrHigh, eventTimestampAddrHigh);
|
||||
EXPECT_EQ(contextCompleteLow, eventTimestampLow);
|
||||
EXPECT_EQ(contextCompleteHigh, eventTimestampHigh);
|
||||
|
||||
HardwareParse hwParser;
|
||||
auto *slbCS = mockDeviceQueueHw->getSlbCS();
|
||||
|
|
|
@ -8,7 +8,6 @@
|
|||
#include "runtime/command_queue/gpgpu_walker.h"
|
||||
#include "runtime/command_queue/hardware_interface.h"
|
||||
#include "runtime/event/hw_timestamps.h"
|
||||
#include "runtime/utilities/tag_allocator.h"
|
||||
#include "runtime/helpers/kernel_commands.h"
|
||||
#include "runtime/helpers/task_information.h"
|
||||
#include "unit_tests/mocks/mock_command_queue.h"
|
||||
|
@ -57,9 +56,9 @@ class MockDeviceQueueHwWithCriticalSectionRelease : public DeviceQueueHw<GfxFami
|
|||
indirectStateSetup = true;
|
||||
return BaseClass::setupIndirectState(surfaceStateHeap, dynamicStateHeap, parentKernel, parentIDCount);
|
||||
}
|
||||
void addExecutionModelCleanUpSection(Kernel *parentKernel, TagNode<HwTimeStamps> *hwTimeStamp, uint32_t taskCount) override {
|
||||
void addExecutionModelCleanUpSection(Kernel *parentKernel, HwTimeStamps *hwTimeStamp, uint32_t taskCount) override {
|
||||
cleanupSectionAdded = true;
|
||||
timestampAddedInCleanupSection = hwTimeStamp ? hwTimeStamp->tag : nullptr;
|
||||
timestampAddedInCleanupSection = hwTimeStamp;
|
||||
return BaseClass::addExecutionModelCleanUpSection(parentKernel, hwTimeStamp, taskCount);
|
||||
}
|
||||
void dispatchScheduler(CommandQueue &cmdQ, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh) override {
|
||||
|
@ -250,12 +249,13 @@ HWTEST_F(ParentKernelCommandQueueFixture, givenBlockedParentKernelWithProfilingW
|
|||
std::vector<Surface *> surfaces;
|
||||
auto *cmdComputeKernel = new CommandComputeKernel(*pCmdQ, std::unique_ptr<KernelOperation>(blockedCommandData), surfaces, false, false, false, nullptr, preemptionMode, parentKernel, 1);
|
||||
|
||||
auto timestamp = pCmdQ->getCommandStreamReceiver().getEventTsAllocator()->getTag();
|
||||
cmdComputeKernel->timestamp = timestamp;
|
||||
HwTimeStamps timestamp;
|
||||
|
||||
cmdComputeKernel->timestamp = ×tamp;
|
||||
cmdComputeKernel->submit(0, false);
|
||||
|
||||
EXPECT_TRUE(mockDevQueue.cleanupSectionAdded);
|
||||
EXPECT_EQ(mockDevQueue.timestampAddedInCleanupSection, timestamp->tag);
|
||||
EXPECT_EQ(mockDevQueue.timestampAddedInCleanupSection, ×tamp);
|
||||
|
||||
delete cmdComputeKernel;
|
||||
delete parentKernel;
|
||||
|
|
Loading…
Reference in New Issue