Revert "Change event memory layout for multi tile capable platforms"

This reverts commit 616dd55789.

Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
Compute-Runtime-Validation
2022-04-07 07:18:40 +02:00
committed by Compute-Runtime-Automation
parent 5b175ebb4a
commit b88e16252e
23 changed files with 105 additions and 337 deletions

View File

@@ -292,7 +292,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
callId = neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount;
}
if (event->isUsingContextEndOffset()) {
if (event->useContextEndOffset()) {
baseAddr += event->getContextEndOffset();
}
@@ -1641,9 +1641,6 @@ void CommandListCoreFamily<gfxCoreFamily>::appendSignalEventPostWalker(ze_event_
commandContainer.addToResidencyContainer(&event->getAllocation(this->device));
uint64_t baseAddr = event->getGpuAddress(this->device);
if (event->isUsingContextEndOffset()) {
baseAddr += event->getContextEndOffset();
}
const auto &hwInfo = this->device->getHwInfo();
if (isCopyOnly()) {
@@ -1657,6 +1654,8 @@ void CommandListCoreFamily<gfxCoreFamily>::appendSignalEventPostWalker(ze_event_
if (this->partitionCount > 1) {
args.workloadPartitionOffset = true;
event->setPacketsInUse(this->partitionCount);
event->setPartitionedEvent(true);
baseAddr += event->getContextEndOffset();
}
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
*commandContainer.getCommandStream(),
@@ -1798,8 +1797,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
callId = neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount;
}
size_t eventSignalOffset = 0;
if (event->isUsingContextEndOffset()) {
if (this->partitionCount > 1) {
event->setPartitionedEvent(true);
event->setPacketsInUse(this->partitionCount);
}
if (event->useContextEndOffset()) {
eventSignalOffset = event->getContextEndOffset();
}
@@ -1813,10 +1815,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
NEO::PipeControlArgs args;
bool applyScope = event->signalScope;
args.dcFlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(applyScope, hwInfo);
if (this->partitionCount > 1) {
event->setPacketsInUse(this->partitionCount);
args.workloadPartitionOffset = true;
}
args.workloadPartitionOffset = event->isPartitionedEvent();
if (applyScope || event->isEventTimestampFlagSet()) {
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
*commandContainer.getCommandStream(),
@@ -1893,7 +1892,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
gpuAddr = event->getGpuAddress(this->device);
uint32_t packetsToWait = event->getPacketsInUse();
if (event->isUsingContextEndOffset()) {
if (event->useContextEndOffset()) {
gpuAddr += event->getContextEndOffset();
}
for (uint32_t i = 0u; i < packetsToWait; i++) {

View File

@@ -121,22 +121,21 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
updateStreamProperties(*kernel, false, isCooperative);
NEO::EncodeDispatchKernelArgs dispatchKernelArgs{
0, // eventAddress
neoDevice, // device
kernel, // dispatchInterface
reinterpret_cast<const void *>(pThreadGroupDimensions), // pThreadGroupDimensions
commandListPreemptionMode, // preemptionMode
0, // partitionCount
isIndirect, // isIndirect
isPredicate, // isPredicate
false, // isTimestampEvent
false, // L3FlushEnable
this->containsStatelessUncachedResource, // requiresUncachedMocs
false, // useGlobalAtomics
internalUsage, // isInternal
isCooperative // isCooperative
0, //eventAddress
neoDevice, //device
kernel, //dispatchInterface
reinterpret_cast<const void *>(pThreadGroupDimensions), //pThreadGroupDimensions
commandListPreemptionMode, //preemptionMode
0, //partitionCount
isIndirect, //isIndirect
isPredicate, //isPredicate
false, //isTimestampEvent
false, //L3FlushEnable
this->containsStatelessUncachedResource, //requiresUncachedMocs
false, //useGlobalAtomics
internalUsage, //isInternal
isCooperative //isCooperative
};
NEO::EncodeDispatchKernel<GfxFamily>::encode(commandContainer, dispatchKernelArgs);
this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs;

View File

@@ -21,7 +21,6 @@
#include "shared/source/xe_hp_core/hw_info.h"
#include "level_zero/core/source/cmdlist/cmdlist_hw.h"
#include "level_zero/core/source/hw_helpers/l0_hw_helper.h"
#include "level_zero/core/source/kernel/kernel_imp.h"
#include "level_zero/core/source/module/module.h"
@@ -95,7 +94,7 @@ void programEventL3Flush(ze_event_handle_t hEvent,
auto eventPartitionOffset = (partitionCount > 1) ? (partitionCount * event->getSinglePacketSize())
: event->getSinglePacketSize();
uint64_t eventAddress = event->getPacketAddress(device) + eventPartitionOffset;
if (event->isUsingContextEndOffset()) {
if (event->useContextEndOffset()) {
eventAddress += event->getContextEndOffset();
}
@@ -164,7 +163,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
eventAlloc = &event->getAllocation(this->device);
commandContainer.addToResidencyContainer(eventAlloc);
L3FlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(event->signalScope, hwInfo);
isTimestampEvent = event->isUsingContextEndOffset();
isTimestampEvent = event->isEventTimestampFlagSet();
eventAddress = event->getPacketAddress(this->device);
}
@@ -215,20 +214,20 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
this->requiresQueueUncachedMocs |= kernelImp->getKernelRequiresQueueUncachedMocs();
NEO::EncodeDispatchKernelArgs dispatchKernelArgs{
eventAddress, // eventAddress
neoDevice, // device
kernel, // dispatchInterface
reinterpret_cast<const void *>(pThreadGroupDimensions), // pThreadGroupDimensions
commandListPreemptionMode, // preemptionMode
this->partitionCount, // partitionCount
isIndirect, // isIndirect
isPredicate, // isPredicate
isTimestampEvent, // isTimestampEvent
L3FlushEnable, // L3FlushEnable
this->containsStatelessUncachedResource, // requiresUncachedMocs
kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, // useGlobalAtomics
internalUsage, // isInternal
isCooperative // isCooperative
eventAddress, //eventAddress
neoDevice, //device
kernel, //dispatchInterface
reinterpret_cast<const void *>(pThreadGroupDimensions), //pThreadGroupDimensions
commandListPreemptionMode, //preemptionMode
this->partitionCount, //partitionCount
isIndirect, //isIndirect
isPredicate, //isPredicate
isTimestampEvent, //isTimestampEvent
L3FlushEnable, //L3FlushEnable
this->containsStatelessUncachedResource, //requiresUncachedMocs
kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, //useGlobalAtomics
internalUsage, //isInternal
isCooperative //isCooperative
};
NEO::EncodeDispatchKernel<GfxFamily>::encode(commandContainer, dispatchKernelArgs);
this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs;
@@ -237,6 +236,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
auto event = Event::fromHandle(hEvent);
if (partitionCount > 1) {
event->setPacketsInUse(partitionCount);
event->setPartitionedEvent(true);
}
if (L3FlushEnable) {
programEventL3Flush<gfxCoreFamily>(hEvent, this->device, partitionCount, commandContainer);

View File

@@ -88,11 +88,14 @@ struct Event : _ze_event_handle_t {
bool isEventTimestampFlagSet() const {
return isTimestampEvent;
}
void setUsingContextEndOffset(bool usingContextEndOffset) {
this->usingContextEndOffset = usingContextEndOffset;
void setPartitionedEvent(bool partitionedEvent) {
this->partitionedEvent = partitionedEvent;
}
bool isUsingContextEndOffset() const {
return isTimestampEvent || usingContextEndOffset;
bool isPartitionedEvent() const {
return partitionedEvent;
}
bool useContextEndOffset() const {
return isTimestampEvent || partitionedEvent;
}
uint64_t globalStartTS;
@@ -120,9 +123,8 @@ struct Event : _ze_event_handle_t {
size_t globalEndOffset = 0u;
size_t timestampSizeInDw = 0u;
size_t singlePacketSize = 0u;
size_t eventPoolOffset = 0u;
bool isTimestampEvent = false;
bool usingContextEndOffset = false;
bool partitionedEvent = false;
};
template <typename TagSizeT>

View File

@@ -8,7 +8,6 @@
#include "shared/source/memory_manager/internal_allocation_storage.h"
#include "level_zero/core/source/event/event.h"
#include "level_zero/core/source/hw_helpers/l0_hw_helper.h"
namespace L0 {
template <typename TagSizeT>
@@ -19,18 +18,15 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
if (eventPool->isEventPoolTimestampFlagSet()) {
event->setEventTimestampFlag(true);
}
auto neoDevice = device->getNEODevice();
event->kernelEventCompletionData = std::make_unique<KernelEventCompletionData<TagSizeT>[]>(EventPacketsCount::maxKernelSplit);
auto alloc = eventPool->getAllocation().getGraphicsAllocation(neoDevice->getRootDeviceIndex());
auto alloc = eventPool->getAllocation().getGraphicsAllocation(device->getNEODevice()->getRootDeviceIndex());
uint64_t baseHostAddr = reinterpret_cast<uint64_t>(alloc->getUnderlyingBuffer());
event->eventPoolOffset = desc->index * eventPool->getEventSize();
event->hostAddress = reinterpret_cast<void *>(baseHostAddr + event->eventPoolOffset);
event->hostAddress = reinterpret_cast<void *>(baseHostAddr + (desc->index * eventPool->getEventSize()));
event->signalScope = desc->signal;
event->waitScope = desc->wait;
event->csr = neoDevice->getDefaultEngine().commandStreamReceiver;
event->setUsingContextEndOffset(L0HwHelper::get(neoDevice->getHardwareInfo().platform.eRenderCoreFamily).multiTileCapablePlatform());
event->csr = device->getNEODevice()->getDefaultEngine().commandStreamReceiver;
EventPoolImp *EventPoolImp = static_cast<struct EventPoolImp *>(eventPool);
// do not reset even if it has been imported, since event pool
@@ -45,7 +41,7 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
template <typename TagSizeT>
uint64_t EventImp<TagSizeT>::getGpuAddress(Device *device) {
auto alloc = eventPool->getAllocation().getGraphicsAllocation(device->getNEODevice()->getRootDeviceIndex());
return (alloc->getGpuAddress() + this->eventPoolOffset);
return (alloc->getGpuAddress() + (index * eventPool->getEventSize()));
}
template <typename TagSizeT>
@@ -124,7 +120,7 @@ ze_result_t EventImp<TagSizeT>::queryStatusNonTimestamp() {
for (uint32_t i = 0; i < kernelCount; i++) {
uint32_t packetsToCheck = kernelEventCompletionData[i].getPacketsUsed();
for (uint32_t packetId = 0; packetId < packetsToCheck; packetId++) {
void const *queryAddress = usingContextEndOffset
void const *queryAddress = partitionedEvent
? kernelEventCompletionData[i].getContextEndAddress(packetId)
: kernelEventCompletionData[i].getContextStartAddress(packetId);
bool ready = NEO::WaitUtils::waitFunctionWithPredicate<const TagSizeT>(
@@ -142,11 +138,9 @@ ze_result_t EventImp<TagSizeT>::queryStatusNonTimestamp() {
template <typename TagSizeT>
ze_result_t EventImp<TagSizeT>::queryStatus() {
TagSizeT *hostAddr = static_cast<TagSizeT *>(hostAddress);
if (metricStreamer != nullptr) {
TagSizeT *hostAddr = static_cast<TagSizeT *>(hostAddress);
if (usingContextEndOffset) {
hostAddr = ptrOffset(hostAddr, this->getContextEndOffset());
}
*hostAddr = metricStreamer->getNotificationState();
}
this->csr->downloadAllocations();
@@ -191,21 +185,21 @@ ze_result_t EventImp<TagSizeT>::hostEventSetValueTimestamps(TagSizeT eventVal) {
template <typename TagSizeT>
ze_result_t EventImp<TagSizeT>::hostEventSetValue(TagSizeT eventVal) {
UNRECOVERABLE_IF(hostAddress == nullptr);
if (isEventTimestampFlagSet()) {
return hostEventSetValueTimestamps(eventVal);
}
auto packetHostAddr = hostAddress;
if (usingContextEndOffset) {
packetHostAddr = ptrOffset(packetHostAddr, contextEndOffset);
}
UNRECOVERABLE_IF(packetHostAddr == nullptr);
for (uint32_t i = 0; i < kernelCount; i++) {
uint32_t packetsToSet = kernelEventCompletionData[i].getPacketsUsed();
for (uint32_t j = 0; j < packetsToSet; j++) {
memcpy_s(packetHostAddr, sizeof(TagSizeT), static_cast<void *>(&eventVal), sizeof(TagSizeT));
if (isPartitionedEvent()) {
void *packetContextEndAddr = ptrOffset(packetHostAddr, contextEndOffset);
memcpy_s(packetContextEndAddr, sizeof(TagSizeT), static_cast<void *>(&eventVal), sizeof(TagSizeT));
}
packetHostAddr = ptrOffset(packetHostAddr, singlePacketSize);
}
}
@@ -274,8 +268,10 @@ ze_result_t EventImp<TagSizeT>::reset() {
kernelEventCompletionData[i].setPacketsUsed(NEO::TimestampPacketSizeControl::preferredPacketCount);
}
}
partitionedEvent = true;
hostEventSetValue(Event::STATE_INITIAL);
resetPackets();
partitionedEvent = false;
return ZE_RESULT_SUCCESS;
}

View File

@@ -39,7 +39,6 @@ class L0HwHelper {
virtual void getAttentionBitmaskForSingleThreads(std::vector<ze_device_thread_t> &threads, const NEO::HardwareInfo &hwInfo, std::unique_ptr<uint8_t[]> &bitmask, size_t &bitmaskSize) const = 0;
virtual std::vector<ze_device_thread_t> getThreadsFromAttentionBitmask(const NEO::HardwareInfo &hwInfo, const uint8_t *bitmask, const size_t bitmaskSize) const = 0;
virtual bool multiTileCapablePlatform() const = 0;
protected:
L0HwHelper() = default;
@@ -63,7 +62,6 @@ class L0HwHelperHw : public L0HwHelper {
bool isIpSamplingSupported(const NEO::HardwareInfo &hwInfo) const override;
void getAttentionBitmaskForSingleThreads(std::vector<ze_device_thread_t> &threads, const NEO::HardwareInfo &hwInfo, std::unique_ptr<uint8_t[]> &bitmask, size_t &bitmaskSize) const override;
std::vector<ze_device_thread_t> getThreadsFromAttentionBitmask(const NEO::HardwareInfo &hwInfo, const uint8_t *bitmask, const size_t bitmaskSize) const override;
bool multiTileCapablePlatform() const override;
};
} // namespace L0

View File

@@ -13,9 +13,4 @@ template <typename GfxFamily>
void L0HwHelperHw<GfxFamily>::setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupType groupType) const {
}
template <typename Family>
bool L0HwHelperHw<Family>::multiTileCapablePlatform() const {
return false;
}
} // namespace L0

View File

@@ -27,11 +27,6 @@ bool L0HwHelperHw<Family>::isResumeWARequired() {
return true;
}
template <>
bool L0HwHelperHw<Family>::multiTileCapablePlatform() const {
return true;
}
// clang-format off
#include "level_zero/core/source/hw_helpers/l0_hw_helper_tgllp_plus.inl"
// clang-format on

View File

@@ -29,11 +29,6 @@ bool L0HwHelperHw<Family>::isIpSamplingSupported(const NEO::HardwareInfo &hwInfo
return NEO::PVC::isXt(hwInfo);
}
template <>
bool L0HwHelperHw<Family>::multiTileCapablePlatform() const {
return true;
}
template class L0HwHelperHw<Family>;
} // namespace L0