Add split implementation for append page fault copy
Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
parent
137790def2
commit
934939c8b6
|
@ -10,6 +10,7 @@
|
||||||
#include "shared/source/helpers/pipe_control_args.h"
|
#include "shared/source/helpers/pipe_control_args.h"
|
||||||
#include "shared/source/helpers/vec.h"
|
#include "shared/source/helpers/vec.h"
|
||||||
#include "shared/source/kernel/kernel_arg_descriptor.h"
|
#include "shared/source/kernel/kernel_arg_descriptor.h"
|
||||||
|
#include "shared/source/memory_manager/memory_pool.h"
|
||||||
|
|
||||||
#include "level_zero/core/source/builtin/builtin_functions_lib.h"
|
#include "level_zero/core/source/builtin/builtin_functions_lib.h"
|
||||||
#include "level_zero/core/source/cmdlist/cmdlist_imp.h"
|
#include "level_zero/core/source/cmdlist/cmdlist_imp.h"
|
||||||
|
@ -245,6 +246,7 @@ struct CommandListCoreFamily : CommandListImp {
|
||||||
|
|
||||||
size_t getTotalSizeForCopyRegion(const ze_copy_region_t *region, uint32_t pitch, uint32_t slicePitch);
|
size_t getTotalSizeForCopyRegion(const ze_copy_region_t *region, uint32_t pitch, uint32_t slicePitch);
|
||||||
bool isAppendSplitNeeded(void *dstPtr, const void *srcPtr, size_t size);
|
bool isAppendSplitNeeded(void *dstPtr, const void *srcPtr, size_t size);
|
||||||
|
bool isAppendSplitNeeded(NEO::MemoryPool dstPool, NEO::MemoryPool srcPool, size_t size);
|
||||||
|
|
||||||
void applyMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes,
|
void applyMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes,
|
||||||
const void **pRanges);
|
const void **pRanges);
|
||||||
|
|
|
@ -2391,17 +2391,22 @@ inline size_t CommandListCoreFamily<gfxCoreFamily>::getTotalSizeForCopyRegion(co
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
bool CommandListCoreFamily<gfxCoreFamily>::isAppendSplitNeeded(void *dstPtr, const void *srcPtr, size_t size) {
|
bool CommandListCoreFamily<gfxCoreFamily>::isAppendSplitNeeded(void *dstPtr, const void *srcPtr, size_t size) {
|
||||||
constexpr size_t minimalSizeForBcsSplit = 4 * MemoryConstants::megaByte;
|
|
||||||
|
|
||||||
auto dstAllocationStruct = getAlignedAllocation(this->device, dstPtr, size, false);
|
auto dstAllocationStruct = getAlignedAllocation(this->device, dstPtr, size, false);
|
||||||
auto srcAllocationStruct = getAlignedAllocation(this->device, srcPtr, size, true);
|
auto srcAllocationStruct = getAlignedAllocation(this->device, srcPtr, size, true);
|
||||||
auto dstMemoryPool = dstAllocationStruct.alloc->getMemoryPool();
|
auto dstMemoryPool = dstAllocationStruct.alloc->getMemoryPool();
|
||||||
auto srcMemoryPool = srcAllocationStruct.alloc->getMemoryPool();
|
auto srcMemoryPool = srcAllocationStruct.alloc->getMemoryPool();
|
||||||
|
|
||||||
|
return this->isAppendSplitNeeded(dstMemoryPool, srcMemoryPool, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
|
inline bool CommandListCoreFamily<gfxCoreFamily>::isAppendSplitNeeded(NEO::MemoryPool dstPool, NEO::MemoryPool srcPool, size_t size) {
|
||||||
|
constexpr size_t minimalSizeForBcsSplit = 4 * MemoryConstants::megaByte;
|
||||||
|
|
||||||
return this->isBcsSplitNeeded &&
|
return this->isBcsSplitNeeded &&
|
||||||
size >= minimalSizeForBcsSplit &&
|
size >= minimalSizeForBcsSplit &&
|
||||||
((!NEO::MemoryPoolHelper::isSystemMemoryPool(dstMemoryPool) && NEO::MemoryPoolHelper::isSystemMemoryPool(srcMemoryPool)) ||
|
((!NEO::MemoryPoolHelper::isSystemMemoryPool(dstPool) && NEO::MemoryPoolHelper::isSystemMemoryPool(srcPool)) ||
|
||||||
(!NEO::MemoryPoolHelper::isSystemMemoryPool(srcMemoryPool) && NEO::MemoryPoolHelper::isSystemMemoryPool(dstMemoryPool)));
|
(!NEO::MemoryPoolHelper::isSystemMemoryPool(srcPool) && NEO::MemoryPoolHelper::isSystemMemoryPool(dstPool)));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
|
|
|
@ -227,7 +227,6 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopy(
|
||||||
ret = CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(dstptr, srcptr, size, hSignalEvent,
|
ret = CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(dstptr, srcptr, size, hSignalEvent,
|
||||||
numWaitEvents, phWaitEvents);
|
numWaitEvents, phWaitEvents);
|
||||||
}
|
}
|
||||||
|
|
||||||
return flushImmediate(ret, true);
|
return flushImmediate(ret, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -320,7 +319,20 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendPageFaultCopy(N
|
||||||
checkAvailableSpace();
|
checkAvailableSpace();
|
||||||
}
|
}
|
||||||
|
|
||||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendPageFaultCopy(dstAllocation, srcAllocation, size, flushHost);
|
ze_result_t ret;
|
||||||
|
|
||||||
|
if (this->isAppendSplitNeeded(dstAllocation->getMemoryPool(), srcAllocation->getMemoryPool(), size)) {
|
||||||
|
uintptr_t dstAddress = static_cast<uintptr_t>(dstAllocation->getGpuAddress());
|
||||||
|
uintptr_t srcAddress = static_cast<uintptr_t>(srcAllocation->getGpuAddress());
|
||||||
|
ret = static_cast<DeviceImp *>(this->device)->bcsSplit.appendSplitCall<gfxCoreFamily, uintptr_t, uintptr_t>(this, dstAddress, srcAddress, size, nullptr, [&](uintptr_t dstAddressParam, uintptr_t srcAddressParam, size_t sizeParam, ze_event_handle_t hSignalEventParam) {
|
||||||
|
this->appendMemoryCopyBlit(dstAddressParam, dstAllocation, 0u,
|
||||||
|
srcAddressParam, srcAllocation, 0u,
|
||||||
|
sizeParam);
|
||||||
|
return this->appendSignalEvent(hSignalEventParam);
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
ret = CommandListCoreFamily<gfxCoreFamily>::appendPageFaultCopy(dstAllocation, srcAllocation, size, flushHost);
|
||||||
|
}
|
||||||
return flushImmediate(ret, false);
|
return flushImmediate(ret, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -702,5 +702,59 @@ HWTEST2_F(CommandQueueCommandsXeHpc, givenSplitBcsCopyAndImmediateCommandListWhe
|
||||||
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.createdFromLatestPool, 10u);
|
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.createdFromLatestPool, 10u);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(CommandQueueCommandsXeHpc, givenSplitBcsCopyAndImmediateCommandListWhenAppendingPageFaultCopyThenSuccessIsReturned, IsXeHpcCore) {
|
||||||
|
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
|
||||||
|
|
||||||
|
DebugManagerStateRestore restorer;
|
||||||
|
DebugManager.flags.SplitBcsCopy.set(1);
|
||||||
|
|
||||||
|
ze_result_t returnValue;
|
||||||
|
auto hwInfo = *NEO::defaultHwInfo;
|
||||||
|
hwInfo.featureTable.ftrBcsInfo = 0b111111111;
|
||||||
|
hwInfo.capabilityTable.blitterOperationsSupported = true;
|
||||||
|
auto testNeoDevice = NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo);
|
||||||
|
auto testL0Device = std::unique_ptr<L0::Device>(L0::Device::create(driverHandle.get(), testNeoDevice, false, &returnValue));
|
||||||
|
|
||||||
|
ze_command_queue_desc_t desc = {};
|
||||||
|
desc.ordinal = static_cast<uint32_t>(testNeoDevice->getEngineGroupIndexFromEngineGroupType(NEO::EngineGroupType::Copy));
|
||||||
|
|
||||||
|
std::unique_ptr<L0::CommandList> commandList0(CommandList::createImmediate(productFamily,
|
||||||
|
testL0Device.get(),
|
||||||
|
&desc,
|
||||||
|
false,
|
||||||
|
NEO::EngineGroupType::Copy,
|
||||||
|
returnValue));
|
||||||
|
ASSERT_NE(nullptr, commandList0);
|
||||||
|
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs.size(), 4u);
|
||||||
|
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 0u);
|
||||||
|
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[1])->getTaskCount(), 0u);
|
||||||
|
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[2])->getTaskCount(), 0u);
|
||||||
|
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[3])->getTaskCount(), 0u);
|
||||||
|
|
||||||
|
constexpr size_t alignment = 4096u;
|
||||||
|
constexpr size_t size = 8 * MemoryConstants::megaByte;
|
||||||
|
void *srcPtr;
|
||||||
|
void *dstPtr;
|
||||||
|
ze_device_mem_alloc_desc_t deviceDesc = {};
|
||||||
|
context->allocDeviceMem(device->toHandle(),
|
||||||
|
&deviceDesc,
|
||||||
|
size, alignment, &srcPtr);
|
||||||
|
ze_host_mem_alloc_desc_t hostDesc = {};
|
||||||
|
context->allocHostMem(&hostDesc, size, alignment, &dstPtr);
|
||||||
|
|
||||||
|
auto result = commandList0->appendPageFaultCopy(testL0Device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(dstPtr)->gpuAllocations.getDefaultGraphicsAllocation(),
|
||||||
|
testL0Device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(srcPtr)->gpuAllocations.getDefaultGraphicsAllocation(),
|
||||||
|
size,
|
||||||
|
false);
|
||||||
|
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 1u);
|
||||||
|
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[1])->getTaskCount(), 1u);
|
||||||
|
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[2])->getTaskCount(), 1u);
|
||||||
|
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[3])->getTaskCount(), 1u);
|
||||||
|
|
||||||
|
context->freeMem(srcPtr);
|
||||||
|
context->freeMem(dstPtr);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace ult
|
} // namespace ult
|
||||||
} // namespace L0
|
} // namespace L0
|
||||||
|
|
Loading…
Reference in New Issue