Add split implementation for append page fault copy

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk
2022-09-15 14:36:55 +00:00
committed by Compute-Runtime-Automation
parent 137790def2
commit 934939c8b6
4 changed files with 79 additions and 6 deletions

View File

@@ -10,6 +10,7 @@
#include "shared/source/helpers/pipe_control_args.h"
#include "shared/source/helpers/vec.h"
#include "shared/source/kernel/kernel_arg_descriptor.h"
#include "shared/source/memory_manager/memory_pool.h"
#include "level_zero/core/source/builtin/builtin_functions_lib.h"
#include "level_zero/core/source/cmdlist/cmdlist_imp.h"
@@ -245,6 +246,7 @@ struct CommandListCoreFamily : CommandListImp {
size_t getTotalSizeForCopyRegion(const ze_copy_region_t *region, uint32_t pitch, uint32_t slicePitch);
bool isAppendSplitNeeded(void *dstPtr, const void *srcPtr, size_t size);
bool isAppendSplitNeeded(NEO::MemoryPool dstPool, NEO::MemoryPool srcPool, size_t size);
void applyMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes,
const void **pRanges);

View File

@@ -2391,17 +2391,22 @@ inline size_t CommandListCoreFamily<gfxCoreFamily>::getTotalSizeForCopyRegion(co
template <GFXCORE_FAMILY gfxCoreFamily>
bool CommandListCoreFamily<gfxCoreFamily>::isAppendSplitNeeded(void *dstPtr, const void *srcPtr, size_t size) {
constexpr size_t minimalSizeForBcsSplit = 4 * MemoryConstants::megaByte;
auto dstAllocationStruct = getAlignedAllocation(this->device, dstPtr, size, false);
auto srcAllocationStruct = getAlignedAllocation(this->device, srcPtr, size, true);
auto dstMemoryPool = dstAllocationStruct.alloc->getMemoryPool();
auto srcMemoryPool = srcAllocationStruct.alloc->getMemoryPool();
return this->isAppendSplitNeeded(dstMemoryPool, srcMemoryPool, size);
}
template <GFXCORE_FAMILY gfxCoreFamily>
inline bool CommandListCoreFamily<gfxCoreFamily>::isAppendSplitNeeded(NEO::MemoryPool dstPool, NEO::MemoryPool srcPool, size_t size) {
constexpr size_t minimalSizeForBcsSplit = 4 * MemoryConstants::megaByte;
return this->isBcsSplitNeeded &&
size >= minimalSizeForBcsSplit &&
((!NEO::MemoryPoolHelper::isSystemMemoryPool(dstMemoryPool) && NEO::MemoryPoolHelper::isSystemMemoryPool(srcMemoryPool)) ||
(!NEO::MemoryPoolHelper::isSystemMemoryPool(srcMemoryPool) && NEO::MemoryPoolHelper::isSystemMemoryPool(dstMemoryPool)));
((!NEO::MemoryPoolHelper::isSystemMemoryPool(dstPool) && NEO::MemoryPoolHelper::isSystemMemoryPool(srcPool)) ||
(!NEO::MemoryPoolHelper::isSystemMemoryPool(srcPool) && NEO::MemoryPoolHelper::isSystemMemoryPool(dstPool)));
}
template <GFXCORE_FAMILY gfxCoreFamily>

View File

@@ -227,7 +227,6 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopy(
ret = CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(dstptr, srcptr, size, hSignalEvent,
numWaitEvents, phWaitEvents);
}
return flushImmediate(ret, true);
}
@@ -320,7 +319,20 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendPageFaultCopy(N
checkAvailableSpace();
}
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendPageFaultCopy(dstAllocation, srcAllocation, size, flushHost);
ze_result_t ret;
if (this->isAppendSplitNeeded(dstAllocation->getMemoryPool(), srcAllocation->getMemoryPool(), size)) {
uintptr_t dstAddress = static_cast<uintptr_t>(dstAllocation->getGpuAddress());
uintptr_t srcAddress = static_cast<uintptr_t>(srcAllocation->getGpuAddress());
ret = static_cast<DeviceImp *>(this->device)->bcsSplit.appendSplitCall<gfxCoreFamily, uintptr_t, uintptr_t>(this, dstAddress, srcAddress, size, nullptr, [&](uintptr_t dstAddressParam, uintptr_t srcAddressParam, size_t sizeParam, ze_event_handle_t hSignalEventParam) {
this->appendMemoryCopyBlit(dstAddressParam, dstAllocation, 0u,
srcAddressParam, srcAllocation, 0u,
sizeParam);
return this->appendSignalEvent(hSignalEventParam);
});
} else {
ret = CommandListCoreFamily<gfxCoreFamily>::appendPageFaultCopy(dstAllocation, srcAllocation, size, flushHost);
}
return flushImmediate(ret, false);
}