mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-04 07:14:10 +08:00
feature: new multitile post sync layout for immediate write [2/n]
No functional changes in this commit. This is prework. Related-To: NEO-7966 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
112bbec6e9
commit
3d49658f50
@@ -284,6 +284,7 @@ struct CommandListCoreFamily : CommandListImp {
|
||||
void appendComputeBarrierCommand();
|
||||
NEO::PipeControlArgs createBarrierFlags();
|
||||
void appendMultiTileBarrier(NEO::Device &neoDevice);
|
||||
void appendDispatchOffsetRegister(bool workloadPartitionEvent, bool beforeProfilingCmds);
|
||||
size_t estimateBufferSizeMultiTileBarrier(const NEO::RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
uint64_t getInputBufferSize(NEO::ImageType imageType, uint64_t bytesPerPixel, const ze_image_region_t *region);
|
||||
MOCKABLE_VIRTUAL AlignedAllocationData getAlignedAllocationData(Device *device, const void *buffer, uint64_t bufferSize, bool hostCopyAllowed);
|
||||
|
||||
@@ -2320,6 +2320,9 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(Event *event,
|
||||
}
|
||||
|
||||
commandContainer.addToResidencyContainer(&event->getAllocation(this->device));
|
||||
bool workloadPartition = isTimestampEventForMultiTile(event);
|
||||
|
||||
appendDispatchOffsetRegister(workloadPartition, true);
|
||||
|
||||
if (beforeWalker) {
|
||||
event->resetKernelCountAndPacketUsedCount();
|
||||
@@ -2339,9 +2342,10 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(Event *event,
|
||||
|
||||
uint64_t baseAddr = event->getGpuAddress(this->device);
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(*commandContainer.getCommandStream(), baseAddr, false, rootDeviceEnvironment);
|
||||
bool workloadPartition = isTimestampEventForMultiTile(event);
|
||||
appendWriteKernelTimestamp(event, beforeWalker, true, workloadPartition);
|
||||
}
|
||||
|
||||
appendDispatchOffsetRegister(workloadPartition, false);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -293,4 +293,8 @@ inline NEO::PreemptionMode CommandListCoreFamily<gfxCoreFamily>::obtainKernelPre
|
||||
return NEO::PreemptionHelper::taskPreemptionMode(device->getDevicePreemptionMode(), flags);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendDispatchOffsetRegister(bool workloadPartitionEvent, bool beforeProfilingCmds) {
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -481,4 +481,13 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(Eve
|
||||
}
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendDispatchOffsetRegister(bool workloadPartitionEvent, bool beforeProfilingCmds) {
|
||||
if (workloadPartitionEvent && NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
|
||||
auto offset = beforeProfilingCmds ? NEO::ImplicitScalingDispatch<GfxFamily>::getTimeStampPostSyncOffset() : NEO::ImplicitScalingDispatch<GfxFamily>::getImmediateWritePostSyncOffset();
|
||||
|
||||
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(*commandContainer.getCommandStream(), offset);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -119,6 +119,9 @@ struct Event : _ze_event_handle_t {
|
||||
size_t getSinglePacketSize() const {
|
||||
return singlePacketSize;
|
||||
}
|
||||
void setSinglePacketSize(size_t size) {
|
||||
singlePacketSize = size;
|
||||
}
|
||||
size_t getTimestampSizeInDw() const {
|
||||
return timestampSizeInDw;
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/helpers/api_specific_config.h"
|
||||
#include "shared/source/helpers/timestamp_packet.h"
|
||||
|
||||
#include "level_zero/core/source/event/event.h"
|
||||
@@ -33,6 +34,10 @@ struct EventImp : public Event {
|
||||
globalEndOffset = NEO::TimestampPackets<TagSizeT>::getGlobalEndOffset();
|
||||
timestampSizeInDw = (sizeof(TagSizeT) / sizeof(uint32_t));
|
||||
singlePacketSize = NEO::TimestampPackets<TagSizeT>::getSinglePacketSize();
|
||||
|
||||
if (NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled()) {
|
||||
singlePacketSize = sizeof(uint64_t);
|
||||
}
|
||||
}
|
||||
|
||||
~EventImp() override {}
|
||||
|
||||
@@ -30,6 +30,7 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
|
||||
|
||||
if (eventPool->isEventPoolTimestampFlagSet()) {
|
||||
event->setEventTimestampFlag(true);
|
||||
event->setSinglePacketSize(NEO::TimestampPackets<TagSizeT>::getSinglePacketSize());
|
||||
}
|
||||
auto &hwInfo = neoDevice->getHardwareInfo();
|
||||
|
||||
@@ -51,7 +52,7 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
|
||||
event->kernelEventCompletionData =
|
||||
std::make_unique<KernelEventCompletionData<TagSizeT>[]>(event->maxKernelCount);
|
||||
|
||||
bool useContextEndOffset = eventPool->isImplicitScalingCapableFlagSet();
|
||||
bool useContextEndOffset = eventPool->isImplicitScalingCapableFlagSet() && !NEO::ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled();
|
||||
int32_t overrideUseContextEndOffset = NEO::DebugManager.flags.UseContextEndOffsetForEventCompletion.get();
|
||||
if (overrideUseContextEndOffset != -1) {
|
||||
useContextEndOffset = !!overrideUseContextEndOffset;
|
||||
|
||||
@@ -35,6 +35,10 @@ bool ApiSpecificConfig::isDeviceAllocationCacheEnabled() {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ApiSpecificConfig::isDynamicPostSyncAllocLayoutEnabled() {
|
||||
return (NEO::DebugManager.flags.EnableDynamicPostSyncAllocLayout.get() == 1);
|
||||
}
|
||||
|
||||
ApiSpecificConfig::ApiType ApiSpecificConfig::getApiType() {
|
||||
return ApiSpecificConfig::L0;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user