From 8cabedfe3d766f267714b112f295a63a8eada070 Mon Sep 17 00:00:00 2001 From: Aravind Gopalakrishnan Date: Fri, 6 Mar 2020 16:04:47 -0800 Subject: [PATCH] Fix global start timestamp to provide full 36 bit value While at it, resize events to 16 bytes since it's sufficient to work across HW families. Change-Id: I3459926373468246991c141bd96288dba834695b Signed-off-by: Aravind Gopalakrishnan --- level_zero/core/source/cmdlist_hw.inl | 5 +- level_zero/core/source/cmdlist_hw_base.inl | 13 ++--- level_zero/core/source/event.cpp | 61 +++++++++++++++------- level_zero/core/source/event.h | 16 ++++-- 4 files changed, 64 insertions(+), 31 deletions(-) diff --git a/level_zero/core/source/cmdlist_hw.inl b/level_zero/core/source/cmdlist_hw.inl index 31c8a0bca9..3bf7112dc0 100644 --- a/level_zero/core/source/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist_hw.inl @@ -1106,6 +1106,7 @@ ze_result_t CommandListCoreFamily::appendWaitOnEvents(uint32_t nu using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION; uint64_t gpuAddr = 0; + constexpr uint32_t eventStateClear = static_cast(-1); for (uint32_t i = 0; i < numEvents; i++) { auto event = Event::fromHandle(phEvent[i]); @@ -1113,12 +1114,12 @@ ze_result_t CommandListCoreFamily::appendWaitOnEvents(uint32_t nu gpuAddr = event->getGpuAddress(); if (event->isTimestampEvent) { - gpuAddr += event->getOffsetOfProfilingEvent(ZE_EVENT_TIMESTAMP_CONTEXT_END); + gpuAddr += event->getOffsetOfEventTimestampRegister(Event::CONTEXT_END); } NEO::HardwareCommandsHelper::programMiSemaphoreWait(*(commandContainer.getCommandStream()), gpuAddr, - Event::STATE_CLEARED, + eventStateClear, COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); bool dcFlushEnable = (event->waitScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true; diff --git a/level_zero/core/source/cmdlist_hw_base.inl b/level_zero/core/source/cmdlist_hw_base.inl index 20ab2701e3..eed6e70527 100644 --- a/level_zero/core/source/cmdlist_hw_base.inl +++ b/level_zero/core/source/cmdlist_hw_base.inl @@ -90,19 +90,20 @@ void CommandListCoreFamily::appendEventForProfiling(ze_event_hand commandContainer.addToResidencyContainer(&event->getAllocation()); if (beforeWalker) { - timeStampAddress = event->getGpuAddress() + event->getOffsetOfProfilingEvent(ZE_EVENT_TIMESTAMP_GLOBAL_START); + timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::GLOBAL_START_LOW); NEO::EncodeStoreMMIO::encode(commandContainer, REG_GLOBAL_TIMESTAMP_LDW, timeStampAddress); - timeStampAddress = event->getGpuAddress() + event->getOffsetOfProfilingEvent(ZE_EVENT_TIMESTAMP_CONTEXT_START); + timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::GLOBAL_START_HIGH); + NEO::EncodeStoreMMIO::encode(commandContainer, REG_GLOBAL_TIMESTAMP_UN, timeStampAddress); + + timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::CONTEXT_START); NEO::EncodeStoreMMIO::encode(commandContainer, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timeStampAddress); } else { - // Local Context End SRM - timeStampAddress = event->getGpuAddress() + event->getOffsetOfProfilingEvent(ZE_EVENT_TIMESTAMP_CONTEXT_END); + timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::CONTEXT_END); NEO::EncodeStoreMMIO::encode(commandContainer, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timeStampAddress); - // Global End PC - timeStampAddress = event->getGpuAddress() + event->getOffsetOfProfilingEvent(ZE_EVENT_TIMESTAMP_GLOBAL_END); + timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::GLOBAL_END); bool dcFlushEnable = (event->signalScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true; NEO::MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( diff --git a/level_zero/core/source/event.cpp b/level_zero/core/source/event.cpp index 08faa04f8e..53f8d15a04 100644 --- a/level_zero/core/source/event.cpp +++ b/level_zero/core/source/event.cpp @@ -52,7 +52,7 @@ struct EventImp : public Event { if (isTimestampEvent) { auto baseAddr = reinterpret_cast(hostAddress); - auto timeStampAddress = baseAddr + getOffsetOfProfilingEvent(ZE_EVENT_TIMESTAMP_CONTEXT_END); + auto timeStampAddress = baseAddr + getOffsetOfEventTimestampRegister(Event::CONTEXT_END); hostAddr = reinterpret_cast(timeStampAddress); } @@ -67,8 +67,8 @@ struct EventImp : public Event { EventPool *eventPool; protected: - ze_result_t hostEventSetValue(uint32_t eventValue); - ze_result_t hostEventSetValueTimestamps(uint32_t eventVal); + ze_result_t hostEventSetValue(uint64_t eventValue); + ze_result_t hostEventSetValueTimestamps(uint64_t eventVal); void makeAllocationResident(); }; @@ -83,12 +83,12 @@ struct EventPoolImp : public EventPool { auto timestampMultiplier = 1; if (flags == ZE_EVENT_POOL_FLAG_TIMESTAMP) { isEventPoolUsedForTimestamp = true; - timestampMultiplier = numEventTimestampTypes; + timestampMultiplier = numEventTimestampsToRead; } NEO::AllocationProperties properties( device->getRootDeviceIndex(), count * eventSize * timestampMultiplier, NEO::GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY); - properties.alignment = eventAlignment; + properties.alignment = MemoryConstants::cacheLineSize; eventPoolAllocation = device->getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); UNRECOVERABLE_IF(eventPoolAllocation == nullptr); @@ -130,7 +130,7 @@ struct EventPoolImp : public EventPool { uint32_t getEventSize() override { return eventSize; } - uint32_t getNumEventTimestampTypes() override { return numEventTimestampTypes; } + uint32_t getNumEventTimestampsToRead() override { return numEventTimestampsToRead; } ze_result_t destroyPool() { if (eventPoolUsedCount != 0) { @@ -152,10 +152,9 @@ struct EventPoolImp : public EventPool { std::queue lastEventPoolOffsetUsed; protected: - const uint32_t eventSize = 64u; + const uint32_t eventSize = 16u; const uint32_t eventAlignment = MemoryConstants::cacheLineSize; - - const uint32_t numEventTimestampTypes = 4u; + const int32_t numEventTimestampsToRead = 5u; }; Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *device) { @@ -181,10 +180,10 @@ NEO::GraphicsAllocation &Event::getAllocation() { return eventImp->eventPool->getAllocation(); } -uint64_t Event::getOffsetOfProfilingEvent(uint32_t profileEventType) { +uint64_t Event::getOffsetOfEventTimestampRegister(uint32_t eventTimestampReg) { auto eventImp = static_cast(this); auto eventSize = eventImp->eventPool->getEventSize(); - return (profileEventType * eventSize); + return (eventTimestampReg * eventSize); } ze_result_t Event::destroy() { @@ -207,10 +206,10 @@ void EventImp::makeAllocationResident() { } } -ze_result_t EventImp::hostEventSetValueTimestamps(uint32_t eventVal) { - for (uint32_t i = 0; i < this->eventPool->getNumEventTimestampTypes(); i++) { +ze_result_t EventImp::hostEventSetValueTimestamps(uint64_t eventVal) { + for (uint32_t i = 0; i < this->eventPool->getNumEventTimestampsToRead(); i++) { auto baseAddr = reinterpret_cast(hostAddress); - auto timeStampAddress = baseAddr + getOffsetOfProfilingEvent(i); + auto timeStampAddress = baseAddr + getOffsetOfEventTimestampRegister(i); auto tsptr = reinterpret_cast(timeStampAddress); *(tsptr) = eventVal; @@ -225,7 +224,7 @@ ze_result_t EventImp::hostEventSetValueTimestamps(uint32_t eventVal) { return ZE_RESULT_SUCCESS; } -ze_result_t EventImp::hostEventSetValue(uint32_t eventVal) { +ze_result_t EventImp::hostEventSetValue(uint64_t eventVal) { if (isTimestampEvent) { hostEventSetValueTimestamps(eventVal); } @@ -292,14 +291,38 @@ ze_result_t EventImp::reset() { ze_result_t EventImp::getTimestamp(ze_event_timestamp_type_t timestampType, void *dstptr) { auto baseAddr = reinterpret_cast(hostAddress); + uint64_t *tsptr = nullptr; + uint64_t tsData = Event::STATE_INITIAL; + constexpr uint64_t tsMask = (1ull << 32) - 1; if (!this->isTimestampEvent) return ZE_RESULT_ERROR_INVALID_ARGUMENT; - auto timeStampAddress = baseAddr + getOffsetOfProfilingEvent(timestampType); - auto tsptr = reinterpret_cast(timeStampAddress); + // Ensure timestamps have been written + if (queryStatus() != ZE_RESULT_SUCCESS) { + memcpy_s(dstptr, sizeof(uint64_t), static_cast(&tsData), sizeof(uint64_t)); + return ZE_RESULT_SUCCESS; + } - memcpy_s(dstptr, sizeof(uint64_t), static_cast(tsptr), sizeof(uint64_t)); + if (timestampType == ZE_EVENT_TIMESTAMP_GLOBAL_START) { + tsptr = reinterpret_cast(baseAddr + getOffsetOfEventTimestampRegister(Event::GLOBAL_START_LOW)); + auto tsptrUpper = reinterpret_cast(baseAddr + getOffsetOfEventTimestampRegister(Event::GLOBAL_START_HIGH)); + + tsData = ((*tsptrUpper & tsMask) << 32) | (*tsptr & tsMask); + memcpy_s(dstptr, sizeof(uint64_t), static_cast(&tsData), sizeof(uint64_t)); + return ZE_RESULT_SUCCESS; + } + + if (timestampType == ZE_EVENT_TIMESTAMP_GLOBAL_END) { + tsptr = reinterpret_cast(baseAddr + getOffsetOfEventTimestampRegister(Event::GLOBAL_END)); + } else if (timestampType == ZE_EVENT_TIMESTAMP_CONTEXT_START) { + tsptr = reinterpret_cast(baseAddr + getOffsetOfEventTimestampRegister(Event::CONTEXT_START)); + } else { + tsptr = reinterpret_cast(baseAddr + getOffsetOfEventTimestampRegister(Event::CONTEXT_END)); + } + + tsData = (*tsptr & tsMask); + memcpy_s(dstptr, sizeof(uint64_t), static_cast(&tsData), sizeof(uint64_t)); return ZE_RESULT_SUCCESS; } @@ -328,7 +351,7 @@ ze_result_t EventPoolImp::reserveEventFromPool(int index, Event *event) { auto timestampMultiplier = 1; if (static_cast(this)->isEventPoolUsedForTimestamp) { - timestampMultiplier = numEventTimestampTypes; + timestampMultiplier = numEventTimestampsToRead; } uint64_t baseHostAddr = reinterpret_cast(eventPoolAllocation->getUnderlyingBuffer()); diff --git a/level_zero/core/source/event.h b/level_zero/core/source/event.h index 60f677252e..1c73c4f088 100644 --- a/level_zero/core/source/event.h +++ b/level_zero/core/source/event.h @@ -30,12 +30,20 @@ struct Event : _ze_event_handle_t { virtual ze_result_t reset() = 0; virtual ze_result_t getTimestamp(ze_event_timestamp_type_t timestampType, void *dstptr) = 0; - enum State : uint32_t { + enum State : uint64_t { STATE_SIGNALED = 0u, - STATE_CLEARED = static_cast(-1), + STATE_CLEARED = static_cast(-1), STATE_INITIAL = STATE_CLEARED }; + enum EventTimestampRegister : uint32_t { + GLOBAL_START_LOW = 0u, + GLOBAL_START_HIGH, + GLOBAL_END, + CONTEXT_START, + CONTEXT_END + }; + static Event *create(EventPool *eventPool, const ze_event_desc_t *desc, Device *device); static Event *fromHandle(ze_event_handle_t handle) { return static_cast(handle); } @@ -45,7 +53,7 @@ struct Event : _ze_event_handle_t { NEO::GraphicsAllocation &getAllocation(); uint64_t getGpuAddress() { return gpuAddress; } - uint64_t getOffsetOfProfilingEvent(uint32_t profileEventType); + uint64_t getOffsetOfEventTimestampRegister(uint32_t eventTimestampReg); void *hostAddress = nullptr; uint64_t gpuAddress; @@ -92,7 +100,7 @@ struct EventPool : _ze_event_pool_handle_t { NEO::GraphicsAllocation &getAllocation() { return *eventPoolAllocation; } virtual uint32_t getEventSize() = 0; - virtual uint32_t getNumEventTimestampTypes() = 0; + virtual uint32_t getNumEventTimestampsToRead() = 0; bool isEventPoolUsedForTimestamp = false;