Change event memory layout for multi tile capable platforms

Related-To: NEO-6811

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2022-04-04 19:07:08 +00:00
committed by Compute-Runtime-Automation
parent fd45ac133d
commit 7d79c64c35
23 changed files with 337 additions and 105 deletions

View File

@@ -88,14 +88,11 @@ struct Event : _ze_event_handle_t {
bool isEventTimestampFlagSet() const {
return isTimestampEvent;
}
void setPartitionedEvent(bool partitionedEvent) {
this->partitionedEvent = partitionedEvent;
void setUsingContextEndOffset(bool usingContextEndOffset) {
this->usingContextEndOffset = usingContextEndOffset;
}
bool isPartitionedEvent() const {
return partitionedEvent;
}
bool useContextEndOffset() const {
return isTimestampEvent || partitionedEvent;
bool isUsingContextEndOffset() const {
return isTimestampEvent || usingContextEndOffset;
}
uint64_t globalStartTS;
@@ -123,8 +120,9 @@ struct Event : _ze_event_handle_t {
size_t globalEndOffset = 0u;
size_t timestampSizeInDw = 0u;
size_t singlePacketSize = 0u;
size_t eventPoolOffset = 0u;
bool isTimestampEvent = false;
bool partitionedEvent = false;
bool usingContextEndOffset = false;
};
template <typename TagSizeT>

View File

@@ -8,6 +8,7 @@
#include "shared/source/memory_manager/internal_allocation_storage.h"
#include "level_zero/core/source/event/event.h"
#include "level_zero/core/source/hw_helpers/l0_hw_helper.h"
namespace L0 {
template <typename TagSizeT>
@@ -18,15 +19,18 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
if (eventPool->isEventPoolTimestampFlagSet()) {
event->setEventTimestampFlag(true);
}
auto neoDevice = device->getNEODevice();
event->kernelEventCompletionData = std::make_unique<KernelEventCompletionData<TagSizeT>[]>(EventPacketsCount::maxKernelSplit);
auto alloc = eventPool->getAllocation().getGraphicsAllocation(device->getNEODevice()->getRootDeviceIndex());
auto alloc = eventPool->getAllocation().getGraphicsAllocation(neoDevice->getRootDeviceIndex());
uint64_t baseHostAddr = reinterpret_cast<uint64_t>(alloc->getUnderlyingBuffer());
event->hostAddress = reinterpret_cast<void *>(baseHostAddr + (desc->index * eventPool->getEventSize()));
event->eventPoolOffset = desc->index * eventPool->getEventSize();
event->hostAddress = reinterpret_cast<void *>(baseHostAddr + event->eventPoolOffset);
event->signalScope = desc->signal;
event->waitScope = desc->wait;
event->csr = device->getNEODevice()->getDefaultEngine().commandStreamReceiver;
event->csr = neoDevice->getDefaultEngine().commandStreamReceiver;
event->setUsingContextEndOffset(L0HwHelper::get(neoDevice->getHardwareInfo().platform.eRenderCoreFamily).multiTileCapablePlatform());
EventPoolImp *EventPoolImp = static_cast<struct EventPoolImp *>(eventPool);
// do not reset even if it has been imported, since event pool
@@ -41,7 +45,7 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
template <typename TagSizeT>
uint64_t EventImp<TagSizeT>::getGpuAddress(Device *device) {
auto alloc = eventPool->getAllocation().getGraphicsAllocation(device->getNEODevice()->getRootDeviceIndex());
return (alloc->getGpuAddress() + (index * eventPool->getEventSize()));
return (alloc->getGpuAddress() + this->eventPoolOffset);
}
template <typename TagSizeT>
@@ -120,7 +124,7 @@ ze_result_t EventImp<TagSizeT>::queryStatusNonTimestamp() {
for (uint32_t i = 0; i < kernelCount; i++) {
uint32_t packetsToCheck = kernelEventCompletionData[i].getPacketsUsed();
for (uint32_t packetId = 0; packetId < packetsToCheck; packetId++) {
void const *queryAddress = partitionedEvent
void const *queryAddress = usingContextEndOffset
? kernelEventCompletionData[i].getContextEndAddress(packetId)
: kernelEventCompletionData[i].getContextStartAddress(packetId);
bool ready = NEO::WaitUtils::waitFunctionWithPredicate<const TagSizeT>(
@@ -138,9 +142,11 @@ ze_result_t EventImp<TagSizeT>::queryStatusNonTimestamp() {
template <typename TagSizeT>
ze_result_t EventImp<TagSizeT>::queryStatus() {
TagSizeT *hostAddr = static_cast<TagSizeT *>(hostAddress);
if (metricStreamer != nullptr) {
TagSizeT *hostAddr = static_cast<TagSizeT *>(hostAddress);
if (usingContextEndOffset) {
hostAddr = ptrOffset(hostAddr, this->getContextEndOffset());
}
*hostAddr = metricStreamer->getNotificationState();
}
this->csr->downloadAllocations();
@@ -185,21 +191,21 @@ ze_result_t EventImp<TagSizeT>::hostEventSetValueTimestamps(TagSizeT eventVal) {
template <typename TagSizeT>
ze_result_t EventImp<TagSizeT>::hostEventSetValue(TagSizeT eventVal) {
UNRECOVERABLE_IF(hostAddress == nullptr);
if (isEventTimestampFlagSet()) {
return hostEventSetValueTimestamps(eventVal);
}
auto packetHostAddr = hostAddress;
UNRECOVERABLE_IF(packetHostAddr == nullptr);
if (usingContextEndOffset) {
packetHostAddr = ptrOffset(packetHostAddr, contextEndOffset);
}
for (uint32_t i = 0; i < kernelCount; i++) {
uint32_t packetsToSet = kernelEventCompletionData[i].getPacketsUsed();
for (uint32_t j = 0; j < packetsToSet; j++) {
memcpy_s(packetHostAddr, sizeof(TagSizeT), static_cast<void *>(&eventVal), sizeof(TagSizeT));
if (isPartitionedEvent()) {
void *packetContextEndAddr = ptrOffset(packetHostAddr, contextEndOffset);
memcpy_s(packetContextEndAddr, sizeof(TagSizeT), static_cast<void *>(&eventVal), sizeof(TagSizeT));
}
packetHostAddr = ptrOffset(packetHostAddr, singlePacketSize);
}
}
@@ -268,10 +274,8 @@ ze_result_t EventImp<TagSizeT>::reset() {
kernelEventCompletionData[i].setPacketsUsed(NEO::TimestampPacketSizeControl::preferredPacketCount);
}
}
partitionedEvent = true;
hostEventSetValue(Event::STATE_INITIAL);
resetPackets();
partitionedEvent = false;
return ZE_RESULT_SUCCESS;
}