mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-05 09:09:04 +08:00
feature: create single temporary allocation for bcs split
Related-To: NEO-14557 Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
aa2e76cae7
commit
6191f5aec8
@@ -33,6 +33,7 @@
|
||||
#include "shared/source/helpers/register_offsets.h"
|
||||
#include "shared/source/helpers/state_base_address_helper.h"
|
||||
#include "shared/source/helpers/surface_format_info.h"
|
||||
#include "shared/source/helpers/validators.h"
|
||||
#include "shared/source/indirect_heap/indirect_heap.h"
|
||||
#include "shared/source/memory_manager/allocation_properties.h"
|
||||
#include "shared/source/memory_manager/graphics_allocation.h"
|
||||
@@ -1816,8 +1817,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
||||
callId);
|
||||
}
|
||||
|
||||
auto dstAllocationStruct = getAlignedAllocationData(this->device, sharedSystemEnabled, dstptr, size, false, isCopyOffloadEnabled());
|
||||
auto srcAllocationStruct = getAlignedAllocationData(this->device, sharedSystemEnabled, srcptr, size, true, isCopyOffloadEnabled());
|
||||
auto allocSize = NEO::getIfValid(memoryCopyParams.bcsSplitTotalDstSize, size);
|
||||
auto dstAllocationStruct = getAlignedAllocationData(this->device, sharedSystemEnabled, NEO::getIfValid(memoryCopyParams.bcsSplitBaseDstPtr, dstptr), allocSize, false, isCopyOffloadEnabled());
|
||||
auto srcAllocationStruct = getAlignedAllocationData(this->device, sharedSystemEnabled, NEO::getIfValid(memoryCopyParams.bcsSplitBaseSrcPtr, srcptr), allocSize, true, isCopyOffloadEnabled());
|
||||
|
||||
if ((dstAllocationStruct.alloc == nullptr || srcAllocationStruct.alloc == nullptr) && (sharedSystemEnabled == false)) {
|
||||
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
@@ -2054,11 +2056,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
|
||||
callId);
|
||||
}
|
||||
|
||||
size_t dstSize = this->getTotalSizeForCopyRegion(dstRegion, dstPitch, dstSlicePitch);
|
||||
size_t srcSize = this->getTotalSizeForCopyRegion(srcRegion, srcPitch, srcSlicePitch);
|
||||
size_t dstAllocSize = NEO::getIfValid(memoryCopyParams.bcsSplitTotalDstSize, this->getTotalSizeForCopyRegion(dstRegion, dstPitch, dstSlicePitch));
|
||||
size_t srcAllocSize = NEO::getIfValid(memoryCopyParams.bcsSplitTotalSrcSize, this->getTotalSizeForCopyRegion(srcRegion, srcPitch, srcSlicePitch));
|
||||
|
||||
auto dstAllocationStruct = getAlignedAllocationData(this->device, false, dstPtr, dstSize, false, isCopyOffloadEnabled());
|
||||
auto srcAllocationStruct = getAlignedAllocationData(this->device, false, srcPtr, srcSize, true, isCopyOffloadEnabled());
|
||||
auto dstAllocationStruct = getAlignedAllocationData(this->device, false, NEO::getIfValid(memoryCopyParams.bcsSplitBaseDstPtr, dstPtr), dstAllocSize, false, isCopyOffloadEnabled());
|
||||
auto srcAllocationStruct = getAlignedAllocationData(this->device, false, NEO::getIfValid(memoryCopyParams.bcsSplitBaseSrcPtr, srcPtr), srcAllocSize, true, isCopyOffloadEnabled());
|
||||
|
||||
UNRECOVERABLE_IF(srcSlicePitch && srcPitch == 0);
|
||||
Vec3<size_t> srcSize3 = {srcPitch ? srcPitch : srcRegion->width + srcRegion->originX,
|
||||
|
||||
@@ -252,7 +252,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
||||
ze_result_t appendStagingMemoryCopy(void *dstptr, const void *srcptr, size_t size, ze_event_handle_t hSignalEvent, CmdListMemoryCopyParams &memoryCopyParams);
|
||||
ze_result_t stagingStatusToL0(const NEO::StagingTransferStatus &status) const;
|
||||
size_t estimateAdditionalSizeAppendRegularCommandLists(uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists);
|
||||
void setupFlagsForBcsSplit(CmdListMemoryCopyParams &memoryCopyParams, bool &hasStallingCmds, bool ©OffloadFlush);
|
||||
void setupFlagsForBcsSplit(CmdListMemoryCopyParams &memoryCopyParams, bool &hasStallingCmds, bool ©OffloadFlush, const void *srcPtr, void *dstPtr, size_t srcSize, size_t dstSize);
|
||||
|
||||
MOCKABLE_VIRTUAL void checkAssert();
|
||||
ComputeFlushMethodType computeFlushMethod = nullptr;
|
||||
|
||||
@@ -644,13 +644,18 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendBarrier(ze_even
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamilyImmediate<gfxCoreFamily>::setupFlagsForBcsSplit(CmdListMemoryCopyParams &memoryCopyParams, bool &hasStallingCmds, bool ©OffloadFlush) {
|
||||
void CommandListCoreFamilyImmediate<gfxCoreFamily>::setupFlagsForBcsSplit(CmdListMemoryCopyParams &memoryCopyParams, bool &hasStallingCmds, bool ©OffloadFlush, const void *srcPtr, void *dstPtr, size_t srcSize, size_t dstSize) {
|
||||
memoryCopyParams.relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(1, false); // split generates more than 1 event
|
||||
memoryCopyParams.forceDisableCopyOnlyInOrderSignaling = true;
|
||||
memoryCopyParams.taskCountUpdateRequired = true;
|
||||
memoryCopyParams.copyOffloadAllowed = this->isCopyOffloadEnabled();
|
||||
copyOffloadFlush = memoryCopyParams.copyOffloadAllowed;
|
||||
hasStallingCmds = !memoryCopyParams.relaxedOrderingDispatch;
|
||||
|
||||
memoryCopyParams.bcsSplitBaseDstPtr = dstPtr;
|
||||
memoryCopyParams.bcsSplitBaseSrcPtr = srcPtr;
|
||||
memoryCopyParams.bcsSplitTotalDstSize = dstSize;
|
||||
memoryCopyParams.bcsSplitTotalSrcSize = srcSize;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -689,7 +694,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopy(
|
||||
NEO::TransferDirection direction;
|
||||
auto isSplitNeeded = this->isAppendSplitNeeded(dstptr, srcptr, size, direction);
|
||||
if (isSplitNeeded) {
|
||||
setupFlagsForBcsSplit(memoryCopyParams, hasStallingCmds, copyOffloadFlush);
|
||||
setupFlagsForBcsSplit(memoryCopyParams, hasStallingCmds, copyOffloadFlush, srcptr, dstptr, size, size);
|
||||
|
||||
auto splitCall = [&](CommandListCoreFamilyImmediate<gfxCoreFamily> *subCmdList, void *dstptrParam, const void *srcptrParam, size_t sizeParam, ze_event_handle_t hSignalEventParam) {
|
||||
return subCmdList->CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(dstptrParam, srcptrParam, sizeParam, hSignalEventParam, 0u, nullptr, memoryCopyParams);
|
||||
@@ -742,7 +747,9 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopyRegio
|
||||
NEO::TransferDirection direction;
|
||||
auto isSplitNeeded = this->isAppendSplitNeeded(dstPtr, srcPtr, this->getTotalSizeForCopyRegion(dstRegion, dstPitch, dstSlicePitch), direction);
|
||||
if (isSplitNeeded) {
|
||||
setupFlagsForBcsSplit(memoryCopyParams, hasStallingCmds, copyOffloadFlush);
|
||||
setupFlagsForBcsSplit(memoryCopyParams, hasStallingCmds, copyOffloadFlush, srcPtr, dstPtr,
|
||||
this->getTotalSizeForCopyRegion(srcRegion, srcPitch, srcSlicePitch),
|
||||
this->getTotalSizeForCopyRegion(dstRegion, dstPitch, dstSlicePitch));
|
||||
|
||||
auto splitCall = [&](CommandListCoreFamilyImmediate<gfxCoreFamily> *subCmdList, uint32_t dstOriginXParam, uint32_t srcOriginXParam, size_t sizeParam, ze_event_handle_t hSignalEventParam) {
|
||||
ze_copy_region_t dstRegionLocal = {};
|
||||
|
||||
@@ -7,8 +7,14 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
namespace L0 {
|
||||
struct CmdListMemoryCopyParams {
|
||||
const void *bcsSplitBaseSrcPtr = nullptr;
|
||||
void *bcsSplitBaseDstPtr = nullptr;
|
||||
size_t bcsSplitTotalSrcSize = 0;
|
||||
size_t bcsSplitTotalDstSize = 0;
|
||||
bool relaxedOrderingDispatch = false;
|
||||
bool forceDisableCopyOnlyInOrderSignaling = false;
|
||||
bool copyOffloadAllowed = false;
|
||||
|
||||
@@ -1370,6 +1370,40 @@ HWTEST2_F(AggregatedBcsSplitTests, givenMarkerEventWhenCheckingCompletionThenRes
|
||||
*cmdListHw->inOrderExecInfo->getBaseHostAddress() = 3;
|
||||
}
|
||||
|
||||
HWTEST2_F(AggregatedBcsSplitTests, givenUserPtrWhenAppendCalledThenCreateOnlyOneTempAlloc, IsAtLeastXeHpcCore) {
|
||||
auto ptr = allocHostMem();
|
||||
uint64_t hostPtr = 0;
|
||||
|
||||
auto cmdListHw = static_cast<WhiteBox<L0::CommandListCoreFamilyImmediate<FamilyType::gfxCoreFamily>> *>(cmdList.get());
|
||||
|
||||
auto &tempAllocList = device->getNEODevice()->getMemoryManager()->getTemporaryAllocationsList();
|
||||
|
||||
auto countElements = [&tempAllocList]() {
|
||||
auto current = tempAllocList.peekHead();
|
||||
uint32_t count = 0;
|
||||
while (current) {
|
||||
count++;
|
||||
current = current->next;
|
||||
}
|
||||
|
||||
return count;
|
||||
};
|
||||
|
||||
EXPECT_EQ(0u, countElements());
|
||||
|
||||
cmdListHw->appendMemoryCopy(ptr, &hostPtr, copySize, nullptr, 0, nullptr, copyParams);
|
||||
EXPECT_EQ(1u, countElements());
|
||||
|
||||
cmdListHw->hostSynchronize(1, true);
|
||||
EXPECT_EQ(0u, countElements());
|
||||
|
||||
ze_copy_region_t region = {0, 0, 0, static_cast<uint32_t>(copySize), 1, 1};
|
||||
cmdListHw->appendMemoryCopyRegion(ptr, ®ion, 0, 0, &hostPtr, ®ion, 0, 0, nullptr, 0, nullptr, copyParams);
|
||||
EXPECT_EQ(1u, countElements());
|
||||
|
||||
context->freeMem(ptr);
|
||||
}
|
||||
|
||||
HWTEST2_F(AggregatedBcsSplitTests, givenFullCmdBufferWhenAppendCalledThenAllocateNewBuffer, IsAtLeastXeHpcCore) {
|
||||
auto ptr = allocHostMem();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user