mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 14:55:24 +08:00
Change event memory layout for multi tile capable platforms
Related-To: NEO-6811 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
9e1bd1005b
commit
616dd55789
@@ -292,7 +292,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
|
||||
callId = neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount;
|
||||
}
|
||||
|
||||
if (event->useContextEndOffset()) {
|
||||
if (event->isUsingContextEndOffset()) {
|
||||
baseAddr += event->getContextEndOffset();
|
||||
}
|
||||
|
||||
@@ -1641,6 +1641,9 @@ void CommandListCoreFamily<gfxCoreFamily>::appendSignalEventPostWalker(ze_event_
|
||||
|
||||
commandContainer.addToResidencyContainer(&event->getAllocation(this->device));
|
||||
uint64_t baseAddr = event->getGpuAddress(this->device);
|
||||
if (event->isUsingContextEndOffset()) {
|
||||
baseAddr += event->getContextEndOffset();
|
||||
}
|
||||
|
||||
const auto &hwInfo = this->device->getHwInfo();
|
||||
if (isCopyOnly()) {
|
||||
@@ -1654,8 +1657,6 @@ void CommandListCoreFamily<gfxCoreFamily>::appendSignalEventPostWalker(ze_event_
|
||||
if (this->partitionCount > 1) {
|
||||
args.workloadPartitionOffset = true;
|
||||
event->setPacketsInUse(this->partitionCount);
|
||||
event->setPartitionedEvent(true);
|
||||
baseAddr += event->getContextEndOffset();
|
||||
}
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
|
||||
*commandContainer.getCommandStream(),
|
||||
@@ -1797,11 +1798,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
|
||||
callId = neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount;
|
||||
}
|
||||
size_t eventSignalOffset = 0;
|
||||
if (this->partitionCount > 1) {
|
||||
event->setPartitionedEvent(true);
|
||||
event->setPacketsInUse(this->partitionCount);
|
||||
}
|
||||
if (event->useContextEndOffset()) {
|
||||
|
||||
if (event->isUsingContextEndOffset()) {
|
||||
eventSignalOffset = event->getContextEndOffset();
|
||||
}
|
||||
|
||||
@@ -1815,7 +1813,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
|
||||
NEO::PipeControlArgs args;
|
||||
bool applyScope = event->signalScope;
|
||||
args.dcFlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(applyScope, hwInfo);
|
||||
args.workloadPartitionOffset = event->isPartitionedEvent();
|
||||
if (this->partitionCount > 1) {
|
||||
event->setPacketsInUse(this->partitionCount);
|
||||
args.workloadPartitionOffset = true;
|
||||
}
|
||||
if (applyScope || event->isEventTimestampFlagSet()) {
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
|
||||
*commandContainer.getCommandStream(),
|
||||
@@ -1892,7 +1893,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
|
||||
gpuAddr = event->getGpuAddress(this->device);
|
||||
uint32_t packetsToWait = event->getPacketsInUse();
|
||||
|
||||
if (event->useContextEndOffset()) {
|
||||
if (event->isUsingContextEndOffset()) {
|
||||
gpuAddr += event->getContextEndOffset();
|
||||
}
|
||||
for (uint32_t i = 0u; i < packetsToWait; i++) {
|
||||
|
||||
@@ -121,21 +121,22 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
|
||||
|
||||
updateStreamProperties(*kernel, false, isCooperative);
|
||||
NEO::EncodeDispatchKernelArgs dispatchKernelArgs{
|
||||
0, //eventAddress
|
||||
neoDevice, //device
|
||||
kernel, //dispatchInterface
|
||||
reinterpret_cast<const void *>(pThreadGroupDimensions), //pThreadGroupDimensions
|
||||
commandListPreemptionMode, //preemptionMode
|
||||
0, //partitionCount
|
||||
isIndirect, //isIndirect
|
||||
isPredicate, //isPredicate
|
||||
false, //isTimestampEvent
|
||||
false, //L3FlushEnable
|
||||
this->containsStatelessUncachedResource, //requiresUncachedMocs
|
||||
false, //useGlobalAtomics
|
||||
internalUsage, //isInternal
|
||||
isCooperative //isCooperative
|
||||
0, // eventAddress
|
||||
neoDevice, // device
|
||||
kernel, // dispatchInterface
|
||||
reinterpret_cast<const void *>(pThreadGroupDimensions), // pThreadGroupDimensions
|
||||
commandListPreemptionMode, // preemptionMode
|
||||
0, // partitionCount
|
||||
isIndirect, // isIndirect
|
||||
isPredicate, // isPredicate
|
||||
false, // isTimestampEvent
|
||||
false, // L3FlushEnable
|
||||
this->containsStatelessUncachedResource, // requiresUncachedMocs
|
||||
false, // useGlobalAtomics
|
||||
internalUsage, // isInternal
|
||||
isCooperative // isCooperative
|
||||
};
|
||||
|
||||
NEO::EncodeDispatchKernel<GfxFamily>::encode(commandContainer, dispatchKernelArgs);
|
||||
this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs;
|
||||
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
#include "shared/source/xe_hp_core/hw_info.h"
|
||||
|
||||
#include "level_zero/core/source/cmdlist/cmdlist_hw.h"
|
||||
#include "level_zero/core/source/hw_helpers/l0_hw_helper.h"
|
||||
#include "level_zero/core/source/kernel/kernel_imp.h"
|
||||
#include "level_zero/core/source/module/module.h"
|
||||
|
||||
@@ -94,7 +95,7 @@ void programEventL3Flush(ze_event_handle_t hEvent,
|
||||
auto eventPartitionOffset = (partitionCount > 1) ? (partitionCount * event->getSinglePacketSize())
|
||||
: event->getSinglePacketSize();
|
||||
uint64_t eventAddress = event->getPacketAddress(device) + eventPartitionOffset;
|
||||
if (event->useContextEndOffset()) {
|
||||
if (event->isUsingContextEndOffset()) {
|
||||
eventAddress += event->getContextEndOffset();
|
||||
}
|
||||
|
||||
@@ -163,7 +164,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
|
||||
eventAlloc = &event->getAllocation(this->device);
|
||||
commandContainer.addToResidencyContainer(eventAlloc);
|
||||
L3FlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(event->signalScope, hwInfo);
|
||||
isTimestampEvent = event->isEventTimestampFlagSet();
|
||||
isTimestampEvent = event->isUsingContextEndOffset();
|
||||
eventAddress = event->getPacketAddress(this->device);
|
||||
}
|
||||
|
||||
@@ -214,20 +215,20 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
|
||||
this->requiresQueueUncachedMocs |= kernelImp->getKernelRequiresQueueUncachedMocs();
|
||||
|
||||
NEO::EncodeDispatchKernelArgs dispatchKernelArgs{
|
||||
eventAddress, //eventAddress
|
||||
neoDevice, //device
|
||||
kernel, //dispatchInterface
|
||||
reinterpret_cast<const void *>(pThreadGroupDimensions), //pThreadGroupDimensions
|
||||
commandListPreemptionMode, //preemptionMode
|
||||
this->partitionCount, //partitionCount
|
||||
isIndirect, //isIndirect
|
||||
isPredicate, //isPredicate
|
||||
isTimestampEvent, //isTimestampEvent
|
||||
L3FlushEnable, //L3FlushEnable
|
||||
this->containsStatelessUncachedResource, //requiresUncachedMocs
|
||||
kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, //useGlobalAtomics
|
||||
internalUsage, //isInternal
|
||||
isCooperative //isCooperative
|
||||
eventAddress, // eventAddress
|
||||
neoDevice, // device
|
||||
kernel, // dispatchInterface
|
||||
reinterpret_cast<const void *>(pThreadGroupDimensions), // pThreadGroupDimensions
|
||||
commandListPreemptionMode, // preemptionMode
|
||||
this->partitionCount, // partitionCount
|
||||
isIndirect, // isIndirect
|
||||
isPredicate, // isPredicate
|
||||
isTimestampEvent, // isTimestampEvent
|
||||
L3FlushEnable, // L3FlushEnable
|
||||
this->containsStatelessUncachedResource, // requiresUncachedMocs
|
||||
kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, // useGlobalAtomics
|
||||
internalUsage, // isInternal
|
||||
isCooperative // isCooperative
|
||||
};
|
||||
NEO::EncodeDispatchKernel<GfxFamily>::encode(commandContainer, dispatchKernelArgs);
|
||||
this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs;
|
||||
@@ -236,7 +237,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
|
||||
auto event = Event::fromHandle(hEvent);
|
||||
if (partitionCount > 1) {
|
||||
event->setPacketsInUse(partitionCount);
|
||||
event->setPartitionedEvent(true);
|
||||
}
|
||||
if (L3FlushEnable) {
|
||||
programEventL3Flush<gfxCoreFamily>(hEvent, this->device, partitionCount, commandContainer);
|
||||
|
||||
@@ -88,14 +88,11 @@ struct Event : _ze_event_handle_t {
|
||||
bool isEventTimestampFlagSet() const {
|
||||
return isTimestampEvent;
|
||||
}
|
||||
void setPartitionedEvent(bool partitionedEvent) {
|
||||
this->partitionedEvent = partitionedEvent;
|
||||
void setUsingContextEndOffset(bool usingContextEndOffset) {
|
||||
this->usingContextEndOffset = usingContextEndOffset;
|
||||
}
|
||||
bool isPartitionedEvent() const {
|
||||
return partitionedEvent;
|
||||
}
|
||||
bool useContextEndOffset() const {
|
||||
return isTimestampEvent || partitionedEvent;
|
||||
bool isUsingContextEndOffset() const {
|
||||
return isTimestampEvent || usingContextEndOffset;
|
||||
}
|
||||
|
||||
uint64_t globalStartTS;
|
||||
@@ -123,8 +120,9 @@ struct Event : _ze_event_handle_t {
|
||||
size_t globalEndOffset = 0u;
|
||||
size_t timestampSizeInDw = 0u;
|
||||
size_t singlePacketSize = 0u;
|
||||
size_t eventPoolOffset = 0u;
|
||||
bool isTimestampEvent = false;
|
||||
bool partitionedEvent = false;
|
||||
bool usingContextEndOffset = false;
|
||||
};
|
||||
|
||||
template <typename TagSizeT>
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#include "shared/source/memory_manager/internal_allocation_storage.h"
|
||||
|
||||
#include "level_zero/core/source/event/event.h"
|
||||
#include "level_zero/core/source/hw_helpers/l0_hw_helper.h"
|
||||
|
||||
namespace L0 {
|
||||
template <typename TagSizeT>
|
||||
@@ -18,15 +19,18 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
|
||||
if (eventPool->isEventPoolTimestampFlagSet()) {
|
||||
event->setEventTimestampFlag(true);
|
||||
}
|
||||
auto neoDevice = device->getNEODevice();
|
||||
event->kernelEventCompletionData = std::make_unique<KernelEventCompletionData<TagSizeT>[]>(EventPacketsCount::maxKernelSplit);
|
||||
|
||||
auto alloc = eventPool->getAllocation().getGraphicsAllocation(device->getNEODevice()->getRootDeviceIndex());
|
||||
auto alloc = eventPool->getAllocation().getGraphicsAllocation(neoDevice->getRootDeviceIndex());
|
||||
|
||||
uint64_t baseHostAddr = reinterpret_cast<uint64_t>(alloc->getUnderlyingBuffer());
|
||||
event->hostAddress = reinterpret_cast<void *>(baseHostAddr + (desc->index * eventPool->getEventSize()));
|
||||
event->eventPoolOffset = desc->index * eventPool->getEventSize();
|
||||
event->hostAddress = reinterpret_cast<void *>(baseHostAddr + event->eventPoolOffset);
|
||||
event->signalScope = desc->signal;
|
||||
event->waitScope = desc->wait;
|
||||
event->csr = device->getNEODevice()->getDefaultEngine().commandStreamReceiver;
|
||||
event->csr = neoDevice->getDefaultEngine().commandStreamReceiver;
|
||||
event->setUsingContextEndOffset(L0HwHelper::get(neoDevice->getHardwareInfo().platform.eRenderCoreFamily).multiTileCapablePlatform());
|
||||
|
||||
EventPoolImp *EventPoolImp = static_cast<struct EventPoolImp *>(eventPool);
|
||||
// do not reset even if it has been imported, since event pool
|
||||
@@ -41,7 +45,7 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
|
||||
template <typename TagSizeT>
|
||||
uint64_t EventImp<TagSizeT>::getGpuAddress(Device *device) {
|
||||
auto alloc = eventPool->getAllocation().getGraphicsAllocation(device->getNEODevice()->getRootDeviceIndex());
|
||||
return (alloc->getGpuAddress() + (index * eventPool->getEventSize()));
|
||||
return (alloc->getGpuAddress() + this->eventPoolOffset);
|
||||
}
|
||||
|
||||
template <typename TagSizeT>
|
||||
@@ -120,7 +124,7 @@ ze_result_t EventImp<TagSizeT>::queryStatusNonTimestamp() {
|
||||
for (uint32_t i = 0; i < kernelCount; i++) {
|
||||
uint32_t packetsToCheck = kernelEventCompletionData[i].getPacketsUsed();
|
||||
for (uint32_t packetId = 0; packetId < packetsToCheck; packetId++) {
|
||||
void const *queryAddress = partitionedEvent
|
||||
void const *queryAddress = usingContextEndOffset
|
||||
? kernelEventCompletionData[i].getContextEndAddress(packetId)
|
||||
: kernelEventCompletionData[i].getContextStartAddress(packetId);
|
||||
bool ready = NEO::WaitUtils::waitFunctionWithPredicate<const TagSizeT>(
|
||||
@@ -138,9 +142,11 @@ ze_result_t EventImp<TagSizeT>::queryStatusNonTimestamp() {
|
||||
|
||||
template <typename TagSizeT>
|
||||
ze_result_t EventImp<TagSizeT>::queryStatus() {
|
||||
TagSizeT *hostAddr = static_cast<TagSizeT *>(hostAddress);
|
||||
|
||||
if (metricStreamer != nullptr) {
|
||||
TagSizeT *hostAddr = static_cast<TagSizeT *>(hostAddress);
|
||||
if (usingContextEndOffset) {
|
||||
hostAddr = ptrOffset(hostAddr, this->getContextEndOffset());
|
||||
}
|
||||
*hostAddr = metricStreamer->getNotificationState();
|
||||
}
|
||||
this->csr->downloadAllocations();
|
||||
@@ -185,21 +191,21 @@ ze_result_t EventImp<TagSizeT>::hostEventSetValueTimestamps(TagSizeT eventVal) {
|
||||
|
||||
template <typename TagSizeT>
|
||||
ze_result_t EventImp<TagSizeT>::hostEventSetValue(TagSizeT eventVal) {
|
||||
UNRECOVERABLE_IF(hostAddress == nullptr);
|
||||
|
||||
if (isEventTimestampFlagSet()) {
|
||||
return hostEventSetValueTimestamps(eventVal);
|
||||
}
|
||||
|
||||
auto packetHostAddr = hostAddress;
|
||||
UNRECOVERABLE_IF(packetHostAddr == nullptr);
|
||||
if (usingContextEndOffset) {
|
||||
packetHostAddr = ptrOffset(packetHostAddr, contextEndOffset);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < kernelCount; i++) {
|
||||
uint32_t packetsToSet = kernelEventCompletionData[i].getPacketsUsed();
|
||||
for (uint32_t j = 0; j < packetsToSet; j++) {
|
||||
memcpy_s(packetHostAddr, sizeof(TagSizeT), static_cast<void *>(&eventVal), sizeof(TagSizeT));
|
||||
if (isPartitionedEvent()) {
|
||||
void *packetContextEndAddr = ptrOffset(packetHostAddr, contextEndOffset);
|
||||
memcpy_s(packetContextEndAddr, sizeof(TagSizeT), static_cast<void *>(&eventVal), sizeof(TagSizeT));
|
||||
}
|
||||
packetHostAddr = ptrOffset(packetHostAddr, singlePacketSize);
|
||||
}
|
||||
}
|
||||
@@ -268,10 +274,8 @@ ze_result_t EventImp<TagSizeT>::reset() {
|
||||
kernelEventCompletionData[i].setPacketsUsed(NEO::TimestampPacketSizeControl::preferredPacketCount);
|
||||
}
|
||||
}
|
||||
partitionedEvent = true;
|
||||
hostEventSetValue(Event::STATE_INITIAL);
|
||||
resetPackets();
|
||||
partitionedEvent = false;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
@@ -39,6 +39,7 @@ class L0HwHelper {
|
||||
|
||||
virtual void getAttentionBitmaskForSingleThreads(std::vector<ze_device_thread_t> &threads, const NEO::HardwareInfo &hwInfo, std::unique_ptr<uint8_t[]> &bitmask, size_t &bitmaskSize) const = 0;
|
||||
virtual std::vector<ze_device_thread_t> getThreadsFromAttentionBitmask(const NEO::HardwareInfo &hwInfo, const uint8_t *bitmask, const size_t bitmaskSize) const = 0;
|
||||
virtual bool multiTileCapablePlatform() const = 0;
|
||||
|
||||
protected:
|
||||
L0HwHelper() = default;
|
||||
@@ -62,6 +63,7 @@ class L0HwHelperHw : public L0HwHelper {
|
||||
bool isIpSamplingSupported(const NEO::HardwareInfo &hwInfo) const override;
|
||||
void getAttentionBitmaskForSingleThreads(std::vector<ze_device_thread_t> &threads, const NEO::HardwareInfo &hwInfo, std::unique_ptr<uint8_t[]> &bitmask, size_t &bitmaskSize) const override;
|
||||
std::vector<ze_device_thread_t> getThreadsFromAttentionBitmask(const NEO::HardwareInfo &hwInfo, const uint8_t *bitmask, const size_t bitmaskSize) const override;
|
||||
bool multiTileCapablePlatform() const override;
|
||||
};
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -13,4 +13,9 @@ template <typename GfxFamily>
|
||||
void L0HwHelperHw<GfxFamily>::setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupType groupType) const {
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
bool L0HwHelperHw<Family>::multiTileCapablePlatform() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -27,6 +27,11 @@ bool L0HwHelperHw<Family>::isResumeWARequired() {
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool L0HwHelperHw<Family>::multiTileCapablePlatform() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
// clang-format off
|
||||
#include "level_zero/core/source/hw_helpers/l0_hw_helper_tgllp_plus.inl"
|
||||
// clang-format on
|
||||
|
||||
@@ -29,6 +29,11 @@ bool L0HwHelperHw<Family>::isIpSamplingSupported(const NEO::HardwareInfo &hwInfo
|
||||
return NEO::PVC::isXt(hwInfo);
|
||||
}
|
||||
|
||||
template <>
|
||||
bool L0HwHelperHw<Family>::multiTileCapablePlatform() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
template class L0HwHelperHw<Family>;
|
||||
|
||||
} // namespace L0
|
||||
|
||||
Reference in New Issue
Block a user