feature: new multitile post sync layout for immediate write [2/n]

No functional changes in this commit. This is prework.

Related-To: NEO-7966

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2023-06-07 15:06:16 +00:00
committed by Compute-Runtime-Automation
parent 112bbec6e9
commit 3d49658f50
21 changed files with 195 additions and 4 deletions

View File

@@ -284,6 +284,7 @@ struct CommandListCoreFamily : CommandListImp {
void appendComputeBarrierCommand();
NEO::PipeControlArgs createBarrierFlags();
void appendMultiTileBarrier(NEO::Device &neoDevice);
void appendDispatchOffsetRegister(bool workloadPartitionEvent, bool beforeProfilingCmds);
size_t estimateBufferSizeMultiTileBarrier(const NEO::RootDeviceEnvironment &rootDeviceEnvironment);
uint64_t getInputBufferSize(NEO::ImageType imageType, uint64_t bytesPerPixel, const ze_image_region_t *region);
MOCKABLE_VIRTUAL AlignedAllocationData getAlignedAllocationData(Device *device, const void *buffer, uint64_t bufferSize, bool hostCopyAllowed);

View File

@@ -2320,6 +2320,9 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(Event *event,
}
commandContainer.addToResidencyContainer(&event->getAllocation(this->device));
bool workloadPartition = isTimestampEventForMultiTile(event);
appendDispatchOffsetRegister(workloadPartition, true);
if (beforeWalker) {
event->resetKernelCountAndPacketUsedCount();
@@ -2339,9 +2342,10 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(Event *event,
uint64_t baseAddr = event->getGpuAddress(this->device);
NEO::MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(*commandContainer.getCommandStream(), baseAddr, false, rootDeviceEnvironment);
bool workloadPartition = isTimestampEventForMultiTile(event);
appendWriteKernelTimestamp(event, beforeWalker, true, workloadPartition);
}
appendDispatchOffsetRegister(workloadPartition, false);
}
}

View File

@@ -293,4 +293,8 @@ inline NEO::PreemptionMode CommandListCoreFamily<gfxCoreFamily>::obtainKernelPre
return NEO::PreemptionHelper::taskPreemptionMode(device->getDevicePreemptionMode(), flags);
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendDispatchOffsetRegister(bool workloadPartitionEvent, bool beforeProfilingCmds) {
}
} // namespace L0

View File

@@ -481,4 +481,13 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(Eve
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendDispatchOffsetRegister(bool workloadPartitionEvent, bool beforeProfilingCmds) {
if (workloadPartitionEvent && NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
auto offset = beforeProfilingCmds ? NEO::ImplicitScalingDispatch<GfxFamily>::getTimeStampPostSyncOffset() : NEO::ImplicitScalingDispatch<GfxFamily>::getImmediateWritePostSyncOffset();
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(*commandContainer.getCommandStream(), offset);
}
}
} // namespace L0

View File

@@ -119,6 +119,9 @@ struct Event : _ze_event_handle_t {
size_t getSinglePacketSize() const {
return singlePacketSize;
}
void setSinglePacketSize(size_t size) {
singlePacketSize = size;
}
size_t getTimestampSizeInDw() const {
return timestampSizeInDw;
}

View File

@@ -6,6 +6,7 @@
*/
#pragma once
#include "shared/source/helpers/api_specific_config.h"
#include "shared/source/helpers/timestamp_packet.h"
#include "level_zero/core/source/event/event.h"
@@ -33,6 +34,10 @@ struct EventImp : public Event {
globalEndOffset = NEO::TimestampPackets<TagSizeT>::getGlobalEndOffset();
timestampSizeInDw = (sizeof(TagSizeT) / sizeof(uint32_t));
singlePacketSize = NEO::TimestampPackets<TagSizeT>::getSinglePacketSize();
if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
singlePacketSize = sizeof(uint64_t);
}
}
~EventImp() override {}

View File

@@ -30,6 +30,7 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
if (eventPool->isEventPoolTimestampFlagSet()) {
event->setEventTimestampFlag(true);
event->setSinglePacketSize(NEO::TimestampPackets<TagSizeT>::getSinglePacketSize());
}
auto &hwInfo = neoDevice->getHardwareInfo();
@@ -51,7 +52,7 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
event->kernelEventCompletionData =
std::make_unique<KernelEventCompletionData<TagSizeT>[]>(event->maxKernelCount);
bool useContextEndOffset = eventPool->isImplicitScalingCapableFlagSet();
bool useContextEndOffset = eventPool->isImplicitScalingCapableFlagSet() && !NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled();
int32_t overrideUseContextEndOffset = NEO::DebugManager.flags.UseContextEndOffsetForEventCompletion.get();
if (overrideUseContextEndOffset != -1) {
useContextEndOffset = !!overrideUseContextEndOffset;

View File

@@ -35,6 +35,10 @@ bool ApiSpecificConfig::isDeviceAllocationCacheEnabled() {
return false;
}
bool ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled() {
return (NEO::DebugManager.flags.EnableDynamicPostSyncAllocLayout.get() == 1);
}
ApiSpecificConfig::ApiType ApiSpecificConfig::getApiType() {
return ApiSpecificConfig::L0;
}