mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-29 09:03:14 +08:00
Add bcs split implementation for memory copy region
Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
6dacab1c02
commit
3d4b4b5746
@@ -243,6 +243,7 @@ struct CommandListCoreFamily : CommandListImp {
|
||||
void updateStreamProperties(Kernel &kernel, bool isCooperative);
|
||||
void clearCommandsToPatch();
|
||||
|
||||
size_t getTotalSizeForCopyRegion(const ze_copy_region_t *region, uint32_t pitch, uint32_t slicePitch);
|
||||
bool isAppendSplitNeeded(void *dstPtr, const void *srcPtr, size_t size);
|
||||
|
||||
void applyMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes,
|
||||
|
||||
@@ -1269,20 +1269,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
|
||||
callId = neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount;
|
||||
}
|
||||
|
||||
size_t dstSize = 0;
|
||||
size_t srcSize = 0;
|
||||
|
||||
if (srcRegion->depth > 1) {
|
||||
uint32_t hostPtrDstOffset = dstRegion->originX + ((dstRegion->originY) * dstPitch) + ((dstRegion->originZ) * dstSlicePitch);
|
||||
uint32_t hostPtrSrcOffset = srcRegion->originX + ((srcRegion->originY) * srcPitch) + ((srcRegion->originZ) * srcSlicePitch);
|
||||
dstSize = (dstRegion->width * dstRegion->height * dstRegion->depth) + hostPtrDstOffset;
|
||||
srcSize = (srcRegion->width * srcRegion->height * srcRegion->depth) + hostPtrSrcOffset;
|
||||
} else {
|
||||
uint32_t hostPtrDstOffset = dstRegion->originX + ((dstRegion->originY) * dstPitch);
|
||||
uint32_t hostPtrSrcOffset = srcRegion->originX + ((srcRegion->originY) * srcPitch);
|
||||
dstSize = (dstRegion->width * dstRegion->height) + hostPtrDstOffset;
|
||||
srcSize = (srcRegion->width * srcRegion->height) + hostPtrSrcOffset;
|
||||
}
|
||||
size_t dstSize = this->getTotalSizeForCopyRegion(dstRegion, dstPitch, dstSlicePitch);
|
||||
size_t srcSize = this->getTotalSizeForCopyRegion(srcRegion, srcPitch, srcSlicePitch);
|
||||
|
||||
auto dstAllocationStruct = getAlignedAllocation(this->device, dstPtr, dstSize, false);
|
||||
auto srcAllocationStruct = getAlignedAllocation(this->device, srcPtr, srcSize, true);
|
||||
@@ -2386,6 +2374,17 @@ void CommandListCoreFamily<gfxCoreFamily>::clearCommandsToPatch() {
|
||||
commandsToPatch.clear();
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
inline size_t CommandListCoreFamily<gfxCoreFamily>::getTotalSizeForCopyRegion(const ze_copy_region_t *region, uint32_t pitch, uint32_t slicePitch) {
|
||||
if (region->depth > 1) {
|
||||
uint32_t offset = region->originX + ((region->originY) * pitch) + ((region->originZ) * slicePitch);
|
||||
return (region->width * region->height * region->depth) + offset;
|
||||
} else {
|
||||
uint32_t offset = region->originX + ((region->originY) * pitch);
|
||||
return (region->width * region->height) + offset;
|
||||
}
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
bool CommandListCoreFamily<gfxCoreFamily>::isAppendSplitNeeded(void *dstPtr, const void *srcPtr, size_t size) {
|
||||
constexpr size_t minimalSizeForBcsSplit = 4 * MemoryConstants::megaByte;
|
||||
|
||||
@@ -220,7 +220,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopy(
|
||||
ze_result_t ret;
|
||||
|
||||
if (this->isAppendSplitNeeded(dstptr, srcptr, size)) {
|
||||
ret = static_cast<DeviceImp *>(this->device)->bcsSplit.appendSplitCall(this, dstptr, srcptr, size, hSignalEvent, [&](void *dstptrParam, const void *srcptrParam, size_t sizeParam, ze_event_handle_t hSignalEventParam) {
|
||||
ret = static_cast<DeviceImp *>(this->device)->bcsSplit.appendSplitCall<gfxCoreFamily, void *, const void *>(this, dstptr, srcptr, size, hSignalEvent, [&](void *dstptrParam, const void *srcptrParam, size_t sizeParam, ze_event_handle_t hSignalEventParam) {
|
||||
return CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(dstptrParam, srcptrParam, sizeParam, hSignalEventParam, numWaitEvents, phWaitEvents);
|
||||
});
|
||||
} else {
|
||||
@@ -248,9 +248,28 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopyRegio
|
||||
if (this->isFlushTaskSubmissionEnabled) {
|
||||
checkAvailableSpace();
|
||||
}
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(dstPtr, dstRegion, dstPitch, dstSlicePitch,
|
||||
srcPtr, srcRegion, srcPitch, srcSlicePitch,
|
||||
hSignalEvent, numWaitEvents, phWaitEvents);
|
||||
|
||||
ze_result_t ret;
|
||||
|
||||
if (this->isAppendSplitNeeded(dstPtr, srcPtr, this->getTotalSizeForCopyRegion(dstRegion, dstPitch, dstSlicePitch))) {
|
||||
ret = static_cast<DeviceImp *>(this->device)->bcsSplit.appendSplitCall<gfxCoreFamily, uint32_t, uint32_t>(this, dstRegion->originX, srcRegion->originX, dstRegion->width, hSignalEvent, [&](uint32_t dstOriginXParam, uint32_t srcOriginXParam, size_t sizeParam, ze_event_handle_t hSignalEventParam) {
|
||||
ze_copy_region_t dstRegionLocal = {};
|
||||
ze_copy_region_t srcRegionLocal = {};
|
||||
memcpy(&dstRegionLocal, dstRegion, sizeof(ze_copy_region_t));
|
||||
memcpy(&srcRegionLocal, srcRegion, sizeof(ze_copy_region_t));
|
||||
dstRegionLocal.originX = dstOriginXParam;
|
||||
dstRegionLocal.width = static_cast<uint32_t>(sizeParam);
|
||||
srcRegionLocal.originX = srcOriginXParam;
|
||||
srcRegionLocal.width = static_cast<uint32_t>(sizeParam);
|
||||
return CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(dstPtr, &dstRegionLocal, dstPitch, dstSlicePitch,
|
||||
srcPtr, &srcRegionLocal, srcPitch, srcSlicePitch,
|
||||
hSignalEventParam, numWaitEvents, phWaitEvents);
|
||||
});
|
||||
} else {
|
||||
ret = CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(dstPtr, dstRegion, dstPitch, dstSlicePitch,
|
||||
srcPtr, srcRegion, srcPitch, srcSlicePitch,
|
||||
hSignalEvent, numWaitEvents, phWaitEvents);
|
||||
}
|
||||
return flushImmediate(ret, true);
|
||||
}
|
||||
|
||||
|
||||
@@ -47,13 +47,13 @@ struct BcsSplit {
|
||||
std::vector<CommandQueue *> cmdQs;
|
||||
NEO::BcsInfoMask engines = NEO::EngineHelpers::oddLinkedCopyEnginesMask;
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
template <GFXCORE_FAMILY gfxCoreFamily, typename T, typename K>
|
||||
ze_result_t appendSplitCall(CommandListCoreFamilyImmediate<gfxCoreFamily> *cmdList,
|
||||
void *dstptr,
|
||||
const void *srcptr,
|
||||
T dstptr,
|
||||
K srcptr,
|
||||
size_t size,
|
||||
ze_event_handle_t hSignalEvent,
|
||||
std::function<ze_result_t(void *, const void *, size_t, ze_event_handle_t)> appendCall) {
|
||||
std::function<ze_result_t(T, K, size_t, ze_event_handle_t)> appendCall) {
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
|
||||
if (hSignalEvent) {
|
||||
|
||||
@@ -467,6 +467,56 @@ HWTEST2_F(CommandQueueCommandsXeHpc, givenSplitBcsCopyAndImmediateCommandListWhe
|
||||
context->freeMem(dstPtr);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandQueueCommandsXeHpc, givenSplitBcsCopyAndImmediateCommandListWhenAppendingMemoryCopyRegionThenSuccessIsReturned, IsXeHpcCore) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.SplitBcsCopy.set(1);
|
||||
|
||||
ze_result_t returnValue;
|
||||
auto hwInfo = *NEO::defaultHwInfo;
|
||||
hwInfo.featureTable.ftrBcsInfo = 0b111111111;
|
||||
hwInfo.capabilityTable.blitterOperationsSupported = true;
|
||||
auto testNeoDevice = NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo);
|
||||
auto testL0Device = std::unique_ptr<L0::Device>(L0::Device::create(driverHandle.get(), testNeoDevice, false, &returnValue));
|
||||
|
||||
ze_command_queue_desc_t desc = {};
|
||||
desc.ordinal = static_cast<uint32_t>(testNeoDevice->getEngineGroupIndexFromEngineGroupType(NEO::EngineGroupType::Copy));
|
||||
|
||||
std::unique_ptr<L0::CommandList> commandList0(CommandList::createImmediate(productFamily,
|
||||
testL0Device.get(),
|
||||
&desc,
|
||||
false,
|
||||
NEO::EngineGroupType::Copy,
|
||||
returnValue));
|
||||
ASSERT_NE(nullptr, commandList0);
|
||||
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs.size(), 4u);
|
||||
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 0u);
|
||||
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[1])->getTaskCount(), 0u);
|
||||
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[2])->getTaskCount(), 0u);
|
||||
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[3])->getTaskCount(), 0u);
|
||||
|
||||
constexpr size_t alignment = 4096u;
|
||||
constexpr size_t size = 8 * MemoryConstants::megaByte;
|
||||
void *srcPtr;
|
||||
void *dstPtr;
|
||||
ze_device_mem_alloc_desc_t deviceDesc = {};
|
||||
context->allocDeviceMem(device->toHandle(),
|
||||
&deviceDesc,
|
||||
size, alignment, &srcPtr);
|
||||
ze_host_mem_alloc_desc_t hostDesc = {};
|
||||
context->allocHostMem(&hostDesc, size, alignment, &dstPtr);
|
||||
ze_copy_region_t region = {2, 1, 1, 4 * MemoryConstants::megaByte, 1, 1};
|
||||
|
||||
auto result = commandList0->appendMemoryCopyRegion(dstPtr, ®ion, 0, 0, srcPtr, ®ion, 0, 0, nullptr, 0, nullptr);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 1u);
|
||||
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[1])->getTaskCount(), 1u);
|
||||
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[2])->getTaskCount(), 1u);
|
||||
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[3])->getTaskCount(), 1u);
|
||||
|
||||
context->freeMem(srcPtr);
|
||||
context->freeMem(dstPtr);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandQueueCommandsXeHpc, givenSplitBcsCopyAndImmediateCommandListWhenAppendingMemoryCopyWithEventThenSuccessIsReturnedAndMiFlushProgrammed, IsXeHpcCore) {
|
||||
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user