mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 14:02:58 +08:00
Fix global start timestamp to provide full 36 bit value
While at it, resize events to 16 bytes since it's sufficient to work across HW families. Change-Id: I3459926373468246991c141bd96288dba834695b Signed-off-by: Aravind Gopalakrishnan <Aravind.Gopalakrishnan@intel.com>
This commit is contained in:
committed by
sys_ocldev
parent
ca08896884
commit
8cabedfe3d
@@ -1106,6 +1106,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
|
||||
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
|
||||
|
||||
uint64_t gpuAddr = 0;
|
||||
constexpr uint32_t eventStateClear = static_cast<uint32_t>(-1);
|
||||
|
||||
for (uint32_t i = 0; i < numEvents; i++) {
|
||||
auto event = Event::fromHandle(phEvent[i]);
|
||||
@@ -1113,12 +1114,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
|
||||
|
||||
gpuAddr = event->getGpuAddress();
|
||||
if (event->isTimestampEvent) {
|
||||
gpuAddr += event->getOffsetOfProfilingEvent(ZE_EVENT_TIMESTAMP_CONTEXT_END);
|
||||
gpuAddr += event->getOffsetOfEventTimestampRegister(Event::CONTEXT_END);
|
||||
}
|
||||
|
||||
NEO::HardwareCommandsHelper<GfxFamily>::programMiSemaphoreWait(*(commandContainer.getCommandStream()),
|
||||
gpuAddr,
|
||||
Event::STATE_CLEARED,
|
||||
eventStateClear,
|
||||
COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD);
|
||||
|
||||
bool dcFlushEnable = (event->waitScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true;
|
||||
|
||||
@@ -90,19 +90,20 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(ze_event_hand
|
||||
|
||||
commandContainer.addToResidencyContainer(&event->getAllocation());
|
||||
if (beforeWalker) {
|
||||
timeStampAddress = event->getGpuAddress() + event->getOffsetOfProfilingEvent(ZE_EVENT_TIMESTAMP_GLOBAL_START);
|
||||
timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::GLOBAL_START_LOW);
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(commandContainer, REG_GLOBAL_TIMESTAMP_LDW, timeStampAddress);
|
||||
|
||||
timeStampAddress = event->getGpuAddress() + event->getOffsetOfProfilingEvent(ZE_EVENT_TIMESTAMP_CONTEXT_START);
|
||||
timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::GLOBAL_START_HIGH);
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(commandContainer, REG_GLOBAL_TIMESTAMP_UN, timeStampAddress);
|
||||
|
||||
timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::CONTEXT_START);
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(commandContainer, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timeStampAddress);
|
||||
} else {
|
||||
|
||||
// Local Context End SRM
|
||||
timeStampAddress = event->getGpuAddress() + event->getOffsetOfProfilingEvent(ZE_EVENT_TIMESTAMP_CONTEXT_END);
|
||||
timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::CONTEXT_END);
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(commandContainer, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timeStampAddress);
|
||||
|
||||
// Global End PC
|
||||
timeStampAddress = event->getGpuAddress() + event->getOffsetOfProfilingEvent(ZE_EVENT_TIMESTAMP_GLOBAL_END);
|
||||
timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::GLOBAL_END);
|
||||
bool dcFlushEnable = (event->signalScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true;
|
||||
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(
|
||||
|
||||
@@ -52,7 +52,7 @@ struct EventImp : public Event {
|
||||
if (isTimestampEvent) {
|
||||
auto baseAddr = reinterpret_cast<uint64_t>(hostAddress);
|
||||
|
||||
auto timeStampAddress = baseAddr + getOffsetOfProfilingEvent(ZE_EVENT_TIMESTAMP_CONTEXT_END);
|
||||
auto timeStampAddress = baseAddr + getOffsetOfEventTimestampRegister(Event::CONTEXT_END);
|
||||
hostAddr = reinterpret_cast<uint64_t *>(timeStampAddress);
|
||||
}
|
||||
|
||||
@@ -67,8 +67,8 @@ struct EventImp : public Event {
|
||||
EventPool *eventPool;
|
||||
|
||||
protected:
|
||||
ze_result_t hostEventSetValue(uint32_t eventValue);
|
||||
ze_result_t hostEventSetValueTimestamps(uint32_t eventVal);
|
||||
ze_result_t hostEventSetValue(uint64_t eventValue);
|
||||
ze_result_t hostEventSetValueTimestamps(uint64_t eventVal);
|
||||
void makeAllocationResident();
|
||||
};
|
||||
|
||||
@@ -83,12 +83,12 @@ struct EventPoolImp : public EventPool {
|
||||
auto timestampMultiplier = 1;
|
||||
if (flags == ZE_EVENT_POOL_FLAG_TIMESTAMP) {
|
||||
isEventPoolUsedForTimestamp = true;
|
||||
timestampMultiplier = numEventTimestampTypes;
|
||||
timestampMultiplier = numEventTimestampsToRead;
|
||||
}
|
||||
|
||||
NEO::AllocationProperties properties(
|
||||
device->getRootDeviceIndex(), count * eventSize * timestampMultiplier, NEO::GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY);
|
||||
properties.alignment = eventAlignment;
|
||||
properties.alignment = MemoryConstants::cacheLineSize;
|
||||
eventPoolAllocation = device->getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties);
|
||||
|
||||
UNRECOVERABLE_IF(eventPoolAllocation == nullptr);
|
||||
@@ -130,7 +130,7 @@ struct EventPoolImp : public EventPool {
|
||||
|
||||
uint32_t getEventSize() override { return eventSize; }
|
||||
|
||||
uint32_t getNumEventTimestampTypes() override { return numEventTimestampTypes; }
|
||||
uint32_t getNumEventTimestampsToRead() override { return numEventTimestampsToRead; }
|
||||
|
||||
ze_result_t destroyPool() {
|
||||
if (eventPoolUsedCount != 0) {
|
||||
@@ -152,10 +152,9 @@ struct EventPoolImp : public EventPool {
|
||||
std::queue<int> lastEventPoolOffsetUsed;
|
||||
|
||||
protected:
|
||||
const uint32_t eventSize = 64u;
|
||||
const uint32_t eventSize = 16u;
|
||||
const uint32_t eventAlignment = MemoryConstants::cacheLineSize;
|
||||
|
||||
const uint32_t numEventTimestampTypes = 4u;
|
||||
const int32_t numEventTimestampsToRead = 5u;
|
||||
};
|
||||
|
||||
Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *device) {
|
||||
@@ -181,10 +180,10 @@ NEO::GraphicsAllocation &Event::getAllocation() {
|
||||
return eventImp->eventPool->getAllocation();
|
||||
}
|
||||
|
||||
uint64_t Event::getOffsetOfProfilingEvent(uint32_t profileEventType) {
|
||||
uint64_t Event::getOffsetOfEventTimestampRegister(uint32_t eventTimestampReg) {
|
||||
auto eventImp = static_cast<EventImp *>(this);
|
||||
auto eventSize = eventImp->eventPool->getEventSize();
|
||||
return (profileEventType * eventSize);
|
||||
return (eventTimestampReg * eventSize);
|
||||
}
|
||||
|
||||
ze_result_t Event::destroy() {
|
||||
@@ -207,10 +206,10 @@ void EventImp::makeAllocationResident() {
|
||||
}
|
||||
}
|
||||
|
||||
ze_result_t EventImp::hostEventSetValueTimestamps(uint32_t eventVal) {
|
||||
for (uint32_t i = 0; i < this->eventPool->getNumEventTimestampTypes(); i++) {
|
||||
ze_result_t EventImp::hostEventSetValueTimestamps(uint64_t eventVal) {
|
||||
for (uint32_t i = 0; i < this->eventPool->getNumEventTimestampsToRead(); i++) {
|
||||
auto baseAddr = reinterpret_cast<uint64_t>(hostAddress);
|
||||
auto timeStampAddress = baseAddr + getOffsetOfProfilingEvent(i);
|
||||
auto timeStampAddress = baseAddr + getOffsetOfEventTimestampRegister(i);
|
||||
auto tsptr = reinterpret_cast<uint64_t *>(timeStampAddress);
|
||||
|
||||
*(tsptr) = eventVal;
|
||||
@@ -225,7 +224,7 @@ ze_result_t EventImp::hostEventSetValueTimestamps(uint32_t eventVal) {
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t EventImp::hostEventSetValue(uint32_t eventVal) {
|
||||
ze_result_t EventImp::hostEventSetValue(uint64_t eventVal) {
|
||||
if (isTimestampEvent) {
|
||||
hostEventSetValueTimestamps(eventVal);
|
||||
}
|
||||
@@ -292,14 +291,38 @@ ze_result_t EventImp::reset() {
|
||||
|
||||
ze_result_t EventImp::getTimestamp(ze_event_timestamp_type_t timestampType, void *dstptr) {
|
||||
auto baseAddr = reinterpret_cast<uint64_t>(hostAddress);
|
||||
uint64_t *tsptr = nullptr;
|
||||
uint64_t tsData = Event::STATE_INITIAL;
|
||||
constexpr uint64_t tsMask = (1ull << 32) - 1;
|
||||
|
||||
if (!this->isTimestampEvent)
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
|
||||
auto timeStampAddress = baseAddr + getOffsetOfProfilingEvent(timestampType);
|
||||
auto tsptr = reinterpret_cast<uint64_t *>(timeStampAddress);
|
||||
// Ensure timestamps have been written
|
||||
if (queryStatus() != ZE_RESULT_SUCCESS) {
|
||||
memcpy_s(dstptr, sizeof(uint64_t), static_cast<void *>(&tsData), sizeof(uint64_t));
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
memcpy_s(dstptr, sizeof(uint64_t), static_cast<void *>(tsptr), sizeof(uint64_t));
|
||||
if (timestampType == ZE_EVENT_TIMESTAMP_GLOBAL_START) {
|
||||
tsptr = reinterpret_cast<uint64_t *>(baseAddr + getOffsetOfEventTimestampRegister(Event::GLOBAL_START_LOW));
|
||||
auto tsptrUpper = reinterpret_cast<uint64_t *>(baseAddr + getOffsetOfEventTimestampRegister(Event::GLOBAL_START_HIGH));
|
||||
|
||||
tsData = ((*tsptrUpper & tsMask) << 32) | (*tsptr & tsMask);
|
||||
memcpy_s(dstptr, sizeof(uint64_t), static_cast<void *>(&tsData), sizeof(uint64_t));
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
if (timestampType == ZE_EVENT_TIMESTAMP_GLOBAL_END) {
|
||||
tsptr = reinterpret_cast<uint64_t *>(baseAddr + getOffsetOfEventTimestampRegister(Event::GLOBAL_END));
|
||||
} else if (timestampType == ZE_EVENT_TIMESTAMP_CONTEXT_START) {
|
||||
tsptr = reinterpret_cast<uint64_t *>(baseAddr + getOffsetOfEventTimestampRegister(Event::CONTEXT_START));
|
||||
} else {
|
||||
tsptr = reinterpret_cast<uint64_t *>(baseAddr + getOffsetOfEventTimestampRegister(Event::CONTEXT_END));
|
||||
}
|
||||
|
||||
tsData = (*tsptr & tsMask);
|
||||
memcpy_s(dstptr, sizeof(uint64_t), static_cast<void *>(&tsData), sizeof(uint64_t));
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
@@ -328,7 +351,7 @@ ze_result_t EventPoolImp::reserveEventFromPool(int index, Event *event) {
|
||||
|
||||
auto timestampMultiplier = 1;
|
||||
if (static_cast<struct EventPool *>(this)->isEventPoolUsedForTimestamp) {
|
||||
timestampMultiplier = numEventTimestampTypes;
|
||||
timestampMultiplier = numEventTimestampsToRead;
|
||||
}
|
||||
|
||||
uint64_t baseHostAddr = reinterpret_cast<uint64_t>(eventPoolAllocation->getUnderlyingBuffer());
|
||||
|
||||
@@ -30,12 +30,20 @@ struct Event : _ze_event_handle_t {
|
||||
virtual ze_result_t reset() = 0;
|
||||
virtual ze_result_t getTimestamp(ze_event_timestamp_type_t timestampType, void *dstptr) = 0;
|
||||
|
||||
enum State : uint32_t {
|
||||
enum State : uint64_t {
|
||||
STATE_SIGNALED = 0u,
|
||||
STATE_CLEARED = static_cast<uint32_t>(-1),
|
||||
STATE_CLEARED = static_cast<uint64_t>(-1),
|
||||
STATE_INITIAL = STATE_CLEARED
|
||||
};
|
||||
|
||||
enum EventTimestampRegister : uint32_t {
|
||||
GLOBAL_START_LOW = 0u,
|
||||
GLOBAL_START_HIGH,
|
||||
GLOBAL_END,
|
||||
CONTEXT_START,
|
||||
CONTEXT_END
|
||||
};
|
||||
|
||||
static Event *create(EventPool *eventPool, const ze_event_desc_t *desc, Device *device);
|
||||
|
||||
static Event *fromHandle(ze_event_handle_t handle) { return static_cast<Event *>(handle); }
|
||||
@@ -45,7 +53,7 @@ struct Event : _ze_event_handle_t {
|
||||
NEO::GraphicsAllocation &getAllocation();
|
||||
|
||||
uint64_t getGpuAddress() { return gpuAddress; }
|
||||
uint64_t getOffsetOfProfilingEvent(uint32_t profileEventType);
|
||||
uint64_t getOffsetOfEventTimestampRegister(uint32_t eventTimestampReg);
|
||||
|
||||
void *hostAddress = nullptr;
|
||||
uint64_t gpuAddress;
|
||||
@@ -92,7 +100,7 @@ struct EventPool : _ze_event_pool_handle_t {
|
||||
NEO::GraphicsAllocation &getAllocation() { return *eventPoolAllocation; }
|
||||
|
||||
virtual uint32_t getEventSize() = 0;
|
||||
virtual uint32_t getNumEventTimestampTypes() = 0;
|
||||
virtual uint32_t getNumEventTimestampsToRead() = 0;
|
||||
|
||||
bool isEventPoolUsedForTimestamp = false;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user