Fix global start timestamp to provide full 36 bit value

While at it, resize events to 16 bytes since it's sufficient
to work across HW families.

Change-Id: I3459926373468246991c141bd96288dba834695b
Signed-off-by: Aravind Gopalakrishnan <Aravind.Gopalakrishnan@intel.com>
This commit is contained in:
Aravind Gopalakrishnan
2020-03-06 16:04:47 -08:00
committed by sys_ocldev
parent ca08896884
commit 8cabedfe3d
4 changed files with 64 additions and 31 deletions

View File

@@ -1106,6 +1106,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
uint64_t gpuAddr = 0;
constexpr uint32_t eventStateClear = static_cast<uint32_t>(-1);
for (uint32_t i = 0; i < numEvents; i++) {
auto event = Event::fromHandle(phEvent[i]);
@@ -1113,12 +1114,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
gpuAddr = event->getGpuAddress();
if (event->isTimestampEvent) {
gpuAddr += event->getOffsetOfProfilingEvent(ZE_EVENT_TIMESTAMP_CONTEXT_END);
gpuAddr += event->getOffsetOfEventTimestampRegister(Event::CONTEXT_END);
}
NEO::HardwareCommandsHelper<GfxFamily>::programMiSemaphoreWait(*(commandContainer.getCommandStream()),
gpuAddr,
Event::STATE_CLEARED,
eventStateClear,
COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD);
bool dcFlushEnable = (event->waitScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true;

View File

@@ -90,19 +90,20 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(ze_event_hand
commandContainer.addToResidencyContainer(&event->getAllocation());
if (beforeWalker) {
timeStampAddress = event->getGpuAddress() + event->getOffsetOfProfilingEvent(ZE_EVENT_TIMESTAMP_GLOBAL_START);
timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::GLOBAL_START_LOW);
NEO::EncodeStoreMMIO<GfxFamily>::encode(commandContainer, REG_GLOBAL_TIMESTAMP_LDW, timeStampAddress);
timeStampAddress = event->getGpuAddress() + event->getOffsetOfProfilingEvent(ZE_EVENT_TIMESTAMP_CONTEXT_START);
timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::GLOBAL_START_HIGH);
NEO::EncodeStoreMMIO<GfxFamily>::encode(commandContainer, REG_GLOBAL_TIMESTAMP_UN, timeStampAddress);
timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::CONTEXT_START);
NEO::EncodeStoreMMIO<GfxFamily>::encode(commandContainer, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timeStampAddress);
} else {
// Local Context End SRM
timeStampAddress = event->getGpuAddress() + event->getOffsetOfProfilingEvent(ZE_EVENT_TIMESTAMP_CONTEXT_END);
timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::CONTEXT_END);
NEO::EncodeStoreMMIO<GfxFamily>::encode(commandContainer, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timeStampAddress);
// Global End PC
timeStampAddress = event->getGpuAddress() + event->getOffsetOfProfilingEvent(ZE_EVENT_TIMESTAMP_GLOBAL_END);
timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::GLOBAL_END);
bool dcFlushEnable = (event->signalScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true;
NEO::MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(

View File

@@ -52,7 +52,7 @@ struct EventImp : public Event {
if (isTimestampEvent) {
auto baseAddr = reinterpret_cast<uint64_t>(hostAddress);
auto timeStampAddress = baseAddr + getOffsetOfProfilingEvent(ZE_EVENT_TIMESTAMP_CONTEXT_END);
auto timeStampAddress = baseAddr + getOffsetOfEventTimestampRegister(Event::CONTEXT_END);
hostAddr = reinterpret_cast<uint64_t *>(timeStampAddress);
}
@@ -67,8 +67,8 @@ struct EventImp : public Event {
EventPool *eventPool;
protected:
ze_result_t hostEventSetValue(uint32_t eventValue);
ze_result_t hostEventSetValueTimestamps(uint32_t eventVal);
ze_result_t hostEventSetValue(uint64_t eventValue);
ze_result_t hostEventSetValueTimestamps(uint64_t eventVal);
void makeAllocationResident();
};
@@ -83,12 +83,12 @@ struct EventPoolImp : public EventPool {
auto timestampMultiplier = 1;
if (flags == ZE_EVENT_POOL_FLAG_TIMESTAMP) {
isEventPoolUsedForTimestamp = true;
timestampMultiplier = numEventTimestampTypes;
timestampMultiplier = numEventTimestampsToRead;
}
NEO::AllocationProperties properties(
device->getRootDeviceIndex(), count * eventSize * timestampMultiplier, NEO::GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY);
properties.alignment = eventAlignment;
properties.alignment = MemoryConstants::cacheLineSize;
eventPoolAllocation = device->getDriverHandle()->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties);
UNRECOVERABLE_IF(eventPoolAllocation == nullptr);
@@ -130,7 +130,7 @@ struct EventPoolImp : public EventPool {
uint32_t getEventSize() override { return eventSize; }
uint32_t getNumEventTimestampTypes() override { return numEventTimestampTypes; }
uint32_t getNumEventTimestampsToRead() override { return numEventTimestampsToRead; }
ze_result_t destroyPool() {
if (eventPoolUsedCount != 0) {
@@ -152,10 +152,9 @@ struct EventPoolImp : public EventPool {
std::queue<int> lastEventPoolOffsetUsed;
protected:
const uint32_t eventSize = 64u;
const uint32_t eventSize = 16u;
const uint32_t eventAlignment = MemoryConstants::cacheLineSize;
const uint32_t numEventTimestampTypes = 4u;
const int32_t numEventTimestampsToRead = 5u;
};
Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *device) {
@@ -181,10 +180,10 @@ NEO::GraphicsAllocation &Event::getAllocation() {
return eventImp->eventPool->getAllocation();
}
uint64_t Event::getOffsetOfProfilingEvent(uint32_t profileEventType) {
uint64_t Event::getOffsetOfEventTimestampRegister(uint32_t eventTimestampReg) {
auto eventImp = static_cast<EventImp *>(this);
auto eventSize = eventImp->eventPool->getEventSize();
return (profileEventType * eventSize);
return (eventTimestampReg * eventSize);
}
ze_result_t Event::destroy() {
@@ -207,10 +206,10 @@ void EventImp::makeAllocationResident() {
}
}
ze_result_t EventImp::hostEventSetValueTimestamps(uint32_t eventVal) {
for (uint32_t i = 0; i < this->eventPool->getNumEventTimestampTypes(); i++) {
ze_result_t EventImp::hostEventSetValueTimestamps(uint64_t eventVal) {
for (uint32_t i = 0; i < this->eventPool->getNumEventTimestampsToRead(); i++) {
auto baseAddr = reinterpret_cast<uint64_t>(hostAddress);
auto timeStampAddress = baseAddr + getOffsetOfProfilingEvent(i);
auto timeStampAddress = baseAddr + getOffsetOfEventTimestampRegister(i);
auto tsptr = reinterpret_cast<uint64_t *>(timeStampAddress);
*(tsptr) = eventVal;
@@ -225,7 +224,7 @@ ze_result_t EventImp::hostEventSetValueTimestamps(uint32_t eventVal) {
return ZE_RESULT_SUCCESS;
}
ze_result_t EventImp::hostEventSetValue(uint32_t eventVal) {
ze_result_t EventImp::hostEventSetValue(uint64_t eventVal) {
if (isTimestampEvent) {
hostEventSetValueTimestamps(eventVal);
}
@@ -292,14 +291,38 @@ ze_result_t EventImp::reset() {
ze_result_t EventImp::getTimestamp(ze_event_timestamp_type_t timestampType, void *dstptr) {
auto baseAddr = reinterpret_cast<uint64_t>(hostAddress);
uint64_t *tsptr = nullptr;
uint64_t tsData = Event::STATE_INITIAL;
constexpr uint64_t tsMask = (1ull << 32) - 1;
if (!this->isTimestampEvent)
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
auto timeStampAddress = baseAddr + getOffsetOfProfilingEvent(timestampType);
auto tsptr = reinterpret_cast<uint64_t *>(timeStampAddress);
// Ensure timestamps have been written
if (queryStatus() != ZE_RESULT_SUCCESS) {
memcpy_s(dstptr, sizeof(uint64_t), static_cast<void *>(&tsData), sizeof(uint64_t));
return ZE_RESULT_SUCCESS;
}
memcpy_s(dstptr, sizeof(uint64_t), static_cast<void *>(tsptr), sizeof(uint64_t));
if (timestampType == ZE_EVENT_TIMESTAMP_GLOBAL_START) {
tsptr = reinterpret_cast<uint64_t *>(baseAddr + getOffsetOfEventTimestampRegister(Event::GLOBAL_START_LOW));
auto tsptrUpper = reinterpret_cast<uint64_t *>(baseAddr + getOffsetOfEventTimestampRegister(Event::GLOBAL_START_HIGH));
tsData = ((*tsptrUpper & tsMask) << 32) | (*tsptr & tsMask);
memcpy_s(dstptr, sizeof(uint64_t), static_cast<void *>(&tsData), sizeof(uint64_t));
return ZE_RESULT_SUCCESS;
}
if (timestampType == ZE_EVENT_TIMESTAMP_GLOBAL_END) {
tsptr = reinterpret_cast<uint64_t *>(baseAddr + getOffsetOfEventTimestampRegister(Event::GLOBAL_END));
} else if (timestampType == ZE_EVENT_TIMESTAMP_CONTEXT_START) {
tsptr = reinterpret_cast<uint64_t *>(baseAddr + getOffsetOfEventTimestampRegister(Event::CONTEXT_START));
} else {
tsptr = reinterpret_cast<uint64_t *>(baseAddr + getOffsetOfEventTimestampRegister(Event::CONTEXT_END));
}
tsData = (*tsptr & tsMask);
memcpy_s(dstptr, sizeof(uint64_t), static_cast<void *>(&tsData), sizeof(uint64_t));
return ZE_RESULT_SUCCESS;
}
@@ -328,7 +351,7 @@ ze_result_t EventPoolImp::reserveEventFromPool(int index, Event *event) {
auto timestampMultiplier = 1;
if (static_cast<struct EventPool *>(this)->isEventPoolUsedForTimestamp) {
timestampMultiplier = numEventTimestampTypes;
timestampMultiplier = numEventTimestampsToRead;
}
uint64_t baseHostAddr = reinterpret_cast<uint64_t>(eventPoolAllocation->getUnderlyingBuffer());

View File

@@ -30,12 +30,20 @@ struct Event : _ze_event_handle_t {
virtual ze_result_t reset() = 0;
virtual ze_result_t getTimestamp(ze_event_timestamp_type_t timestampType, void *dstptr) = 0;
enum State : uint32_t {
enum State : uint64_t {
STATE_SIGNALED = 0u,
STATE_CLEARED = static_cast<uint32_t>(-1),
STATE_CLEARED = static_cast<uint64_t>(-1),
STATE_INITIAL = STATE_CLEARED
};
enum EventTimestampRegister : uint32_t {
GLOBAL_START_LOW = 0u,
GLOBAL_START_HIGH,
GLOBAL_END,
CONTEXT_START,
CONTEXT_END
};
static Event *create(EventPool *eventPool, const ze_event_desc_t *desc, Device *device);
static Event *fromHandle(ze_event_handle_t handle) { return static_cast<Event *>(handle); }
@@ -45,7 +53,7 @@ struct Event : _ze_event_handle_t {
NEO::GraphicsAllocation &getAllocation();
uint64_t getGpuAddress() { return gpuAddress; }
uint64_t getOffsetOfProfilingEvent(uint32_t profileEventType);
uint64_t getOffsetOfEventTimestampRegister(uint32_t eventTimestampReg);
void *hostAddress = nullptr;
uint64_t gpuAddress;
@@ -92,7 +100,7 @@ struct EventPool : _ze_event_pool_handle_t {
NEO::GraphicsAllocation &getAllocation() { return *eventPoolAllocation; }
virtual uint32_t getEventSize() = 0;
virtual uint32_t getNumEventTimestampTypes() = 0;
virtual uint32_t getNumEventTimestampsToRead() = 0;
bool isEventPoolUsedForTimestamp = false;