mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 09:14:47 +08:00
feature: remote copy support for bcs split
Related-To: NEO-14557 Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
6acf58633d
commit
e2dff82741
@@ -319,7 +319,8 @@ struct CommandListCoreFamily : public CommandListImp {
|
||||
|
||||
size_t getTotalSizeForCopyRegion(const ze_copy_region_t *region, uint32_t pitch, uint32_t slicePitch);
|
||||
bool isAppendSplitNeeded(void *dstPtr, const void *srcPtr, size_t size, NEO::TransferDirection &directionOut);
|
||||
bool isAppendSplitNeeded(NEO::MemoryPool dstPool, NEO::MemoryPool srcPool, size_t size, NEO::TransferDirection &directionOut);
|
||||
bool isAppendSplitNeeded(NEO::MemoryPool dstPool, NEO::MemoryPool srcPool, size_t size, NEO::TransferDirection &directionOut, bool remoteCopy);
|
||||
bool isAppendSplitRemote(NEO::SvmAllocationData *allocData, void *ptr) const;
|
||||
|
||||
void applyMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes,
|
||||
const void **pRanges);
|
||||
|
||||
@@ -3924,6 +3924,18 @@ inline NEO::MemoryPool getMemoryPoolFromAllocDataForSplit(bool allocFound, const
|
||||
return NEO::MemoryPool::memoryNull;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
bool CommandListCoreFamily<gfxCoreFamily>::isAppendSplitRemote(NEO::SvmAllocationData *allocData, void *ptr) const {
|
||||
auto driver = static_cast<DriverHandleImp *>(this->device->getDriverHandle());
|
||||
|
||||
if (allocData) {
|
||||
auto alloc = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
|
||||
return driver->isRemoteResourceNeeded(ptr, alloc, allocData, this->device);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
bool CommandListCoreFamily<gfxCoreFamily>::isAppendSplitNeeded(void *dstPtr, const void *srcPtr, size_t size, NEO::TransferDirection &directionOut) {
|
||||
if (size < minimalSizeForBcsSplit) {
|
||||
@@ -3943,12 +3955,14 @@ bool CommandListCoreFamily<gfxCoreFamily>::isAppendSplitNeeded(void *dstPtr, con
|
||||
}
|
||||
}
|
||||
|
||||
return this->isAppendSplitNeeded(dstMemoryPool, srcMemoryPool, size, directionOut);
|
||||
bool remoteCopy = isAppendSplitRemote(srcAllocData, const_cast<void *>(srcPtr)) || isAppendSplitRemote(dstAllocData, dstPtr);
|
||||
|
||||
return this->isAppendSplitNeeded(dstMemoryPool, srcMemoryPool, size, directionOut, remoteCopy);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
inline bool CommandListCoreFamily<gfxCoreFamily>::isAppendSplitNeeded(NEO::MemoryPool dstPool, NEO::MemoryPool srcPool, size_t size, NEO::TransferDirection &directionOut) {
|
||||
directionOut = NEO::createTransferDirection(!NEO::MemoryPoolHelper::isSystemMemoryPool(srcPool), !NEO::MemoryPoolHelper::isSystemMemoryPool(dstPool));
|
||||
inline bool CommandListCoreFamily<gfxCoreFamily>::isAppendSplitNeeded(NEO::MemoryPool dstPool, NEO::MemoryPool srcPool, size_t size, NEO::TransferDirection &directionOut, bool remoteCopy) {
|
||||
directionOut = NEO::createTransferDirection(!NEO::MemoryPoolHelper::isSystemMemoryPool(srcPool), !NEO::MemoryPoolHelper::isSystemMemoryPool(dstPool), remoteCopy);
|
||||
|
||||
return this->isBcsSplitNeeded &&
|
||||
size >= minimalSizeForBcsSplit &&
|
||||
|
||||
@@ -803,7 +803,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendPageFaultCopy(N
|
||||
ze_result_t ret;
|
||||
|
||||
NEO::TransferDirection direction;
|
||||
auto isSplitNeeded = this->isAppendSplitNeeded(dstAllocation->getMemoryPool(), srcAllocation->getMemoryPool(), size, direction);
|
||||
auto isSplitNeeded = this->isAppendSplitNeeded(dstAllocation->getMemoryPool(), srcAllocation->getMemoryPool(), size, direction, false);
|
||||
|
||||
bool relaxedOrdering = false;
|
||||
|
||||
|
||||
@@ -150,6 +150,7 @@ struct Device : _ze_device_handle_t {
|
||||
virtual ze_result_t getFabricVertex(ze_fabric_vertex_handle_t *phVertex) = 0;
|
||||
virtual uint32_t getEventMaxPacketCount() const = 0;
|
||||
virtual uint32_t getEventMaxKernelCount() const = 0;
|
||||
virtual void bcsSplitReleaseResources() = 0;
|
||||
NEO::TagAllocatorBase *getDeviceInOrderCounterAllocator();
|
||||
NEO::TagAllocatorBase *getHostInOrderCounterAllocator();
|
||||
NEO::TagAllocatorBase *getInOrderTimestampAllocator();
|
||||
|
||||
@@ -73,6 +73,10 @@ namespace L0 {
|
||||
|
||||
DeviceImp::DeviceImp() : bcsSplit(*this){};
|
||||
|
||||
void DeviceImp::bcsSplitReleaseResources() {
|
||||
bcsSplit.releaseResources();
|
||||
}
|
||||
|
||||
DriverHandle *DeviceImp::getDriverHandle() {
|
||||
return this->driverHandle;
|
||||
}
|
||||
|
||||
@@ -179,6 +179,7 @@ struct DeviceImp : public Device, NEO::NonCopyableAndNonMovableClass {
|
||||
NEO::EngineGroupType getInternalEngineGroupType();
|
||||
uint32_t getCopyEngineOrdinal() const;
|
||||
std::optional<uint32_t> tryGetCopyEngineOrdinal() const;
|
||||
void bcsSplitReleaseResources() override;
|
||||
|
||||
protected:
|
||||
ze_result_t queryPeerAccess(DeviceImp *peerDevice);
|
||||
|
||||
@@ -214,6 +214,11 @@ ze_result_t DriverHandleImp::getExtensionProperties(uint32_t *pCount,
|
||||
}
|
||||
|
||||
DriverHandleImp::~DriverHandleImp() {
|
||||
for (auto &device : this->devices) {
|
||||
// release temporary pointers before default context destruction
|
||||
device->bcsSplitReleaseResources();
|
||||
}
|
||||
|
||||
if (this->defaultContext) {
|
||||
L0::Context::fromHandle(this->defaultContext)->destroy();
|
||||
this->defaultContext = nullptr;
|
||||
|
||||
@@ -80,6 +80,7 @@ struct MockDevice : public Device {
|
||||
ADDMETHOD_NOBASE(getDebugProperties, ze_result_t, ZE_RESULT_SUCCESS, (zet_device_debug_properties_t * properties));
|
||||
ADDMETHOD_NOBASE(getDebugSession, DebugSession *, nullptr, (const zet_debug_config_t &config));
|
||||
ADDMETHOD_NOBASE_VOIDRETURN(removeDebugSession, ());
|
||||
ADDMETHOD_NOBASE_VOIDRETURN(bcsSplitReleaseResources, ());
|
||||
ADDMETHOD_NOBASE(obtainReusableAllocation, NEO::GraphicsAllocation *, nullptr, (size_t requiredSize, NEO::AllocationType type))
|
||||
ADDMETHOD_NOBASE_VOIDRETURN(storeReusableAllocation, (NEO::GraphicsAllocation & alloc));
|
||||
ADDMETHOD_NOBASE(getFabricVertex, ze_result_t, ZE_RESULT_SUCCESS, (ze_fabric_vertex_handle_t * phVertex));
|
||||
|
||||
@@ -936,28 +936,31 @@ struct AggregatedBcsSplitTests : public ::testing::Test {
|
||||
debugManager.flags.SplitBcsRequiredEnginesCount.set(expectedEnginesCount);
|
||||
debugManager.flags.SplitBcsMask.set(0b11110);
|
||||
|
||||
device = createDevice();
|
||||
createDevice();
|
||||
context = Context::fromHandle(driverHandle->getDefaultContext());
|
||||
cmdList = createCmdList();
|
||||
}
|
||||
|
||||
std::unique_ptr<L0::Device> createDevice() {
|
||||
ze_result_t returnValue;
|
||||
void createDevice() {
|
||||
auto hwInfo = *NEO::defaultHwInfo;
|
||||
hwInfo.featureTable.ftrBcsInfo = 0b111111111;
|
||||
hwInfo.capabilityTable.blitterOperationsSupported = true;
|
||||
auto neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo);
|
||||
auto neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo, 0);
|
||||
|
||||
NEO::DeviceVector devices;
|
||||
devices.push_back(std::unique_ptr<NEO::Device>(neoDevice));
|
||||
|
||||
for (uint32_t i = 1; i < expectedNumRootDevices; i++) {
|
||||
auto neoRootDevice = NEO::MockDevice::createWithExecutionEnvironment<NEO::MockDevice>(&hwInfo, neoDevice->getExecutionEnvironment(), i);
|
||||
devices.push_back(std::unique_ptr<NEO::Device>(neoRootDevice));
|
||||
}
|
||||
|
||||
driverHandle = std::make_unique<Mock<L0::DriverHandleImp>>();
|
||||
driverHandle->initialize(std::move(devices));
|
||||
|
||||
auto device = std::unique_ptr<L0::Device>(L0::Device::create(driverHandle.get(), neoDevice, false, &returnValue));
|
||||
this->device = driverHandle->devices[0];
|
||||
|
||||
bcsSplit = &static_cast<DeviceImp *>(device.get())->bcsSplit;
|
||||
|
||||
return device;
|
||||
bcsSplit = &static_cast<DeviceImp *>(device)->bcsSplit;
|
||||
}
|
||||
|
||||
uint32_t queryCopyOrdinal() {
|
||||
@@ -987,7 +990,7 @@ struct AggregatedBcsSplitTests : public ::testing::Test {
|
||||
desc.ordinal = queryCopyOrdinal();
|
||||
|
||||
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily,
|
||||
device.get(),
|
||||
device,
|
||||
&desc,
|
||||
false,
|
||||
NEO::EngineGroupType::copy,
|
||||
@@ -1004,16 +1007,25 @@ struct AggregatedBcsSplitTests : public ::testing::Test {
|
||||
return alloc;
|
||||
}
|
||||
|
||||
void *allocDeviceMem(L0::Device *device) {
|
||||
void *alloc = nullptr;
|
||||
ze_device_mem_alloc_desc_t deviceDesc = {};
|
||||
ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, copySize, 4096u, &alloc);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
return alloc;
|
||||
}
|
||||
|
||||
DebugManagerStateRestore restore;
|
||||
CmdListMemoryCopyParams copyParams = {};
|
||||
std::unique_ptr<Mock<L0::DriverHandleImp>> driverHandle;
|
||||
std::unique_ptr<L0::Device> device;
|
||||
L0::Device *device = nullptr;
|
||||
std::unique_ptr<L0::CommandList> cmdList;
|
||||
BcsSplit *bcsSplit = nullptr;
|
||||
Context *context = nullptr;
|
||||
const size_t copySize = 4 * MemoryConstants::megaByte;
|
||||
uint32_t expectedTileCount = 1;
|
||||
uint32_t expectedEnginesCount = 4;
|
||||
uint32_t expectedNumRootDevices = 1;
|
||||
};
|
||||
|
||||
HWTEST2_F(AggregatedBcsSplitTests, whenObtainCalledThenAggregatedEventsCreated, IsAtLeastXeHpcCore) {
|
||||
@@ -1137,6 +1149,31 @@ HWTEST2_F(AggregatedBcsSplitTests, givenMarkerEventWhenCheckingCompletionThenRes
|
||||
context->freeMem(ptr);
|
||||
}
|
||||
|
||||
struct MultiRootAggregatedBcsSplitTests : public AggregatedBcsSplitTests {
|
||||
void SetUp() override {
|
||||
expectedNumRootDevices = 2;
|
||||
debugManager.flags.CreateMultipleRootDevices.set(expectedNumRootDevices);
|
||||
AggregatedBcsSplitTests::SetUp();
|
||||
}
|
||||
};
|
||||
|
||||
HWTEST2_F(MultiRootAggregatedBcsSplitTests, givenRemoteAllocWhenCopyRequestedThenEnableSplit, IsAtLeastXeHpcCore) {
|
||||
auto device1 = driverHandle->devices[1];
|
||||
|
||||
auto ptr = allocHostMem();
|
||||
auto remoteAlloc = allocDeviceMem(device1);
|
||||
auto cmdListHw = static_cast<WhiteBox<L0::CommandListCoreFamilyImmediate<FamilyType::gfxCoreFamily>> *>(cmdList.get());
|
||||
|
||||
cmdListHw->appendMemoryCopy(remoteAlloc, ptr, copySize, nullptr, 0, nullptr, copyParams);
|
||||
EXPECT_EQ(cmdListHw->inOrderExecInfo->getCounterValue(), bcsSplit->events.marker[0]->getInOrderExecBaseSignalValue());
|
||||
|
||||
cmdListHw->appendMemoryCopy(ptr, remoteAlloc, copySize, nullptr, 0, nullptr, copyParams);
|
||||
EXPECT_EQ(cmdListHw->inOrderExecInfo->getCounterValue(), bcsSplit->events.marker[1]->getInOrderExecBaseSignalValue());
|
||||
|
||||
context->freeMem(ptr);
|
||||
context->freeMem(remoteAlloc);
|
||||
}
|
||||
|
||||
struct MultiTileAggregatedBcsSplitTests : public AggregatedBcsSplitTests {
|
||||
void SetUp() override {
|
||||
expectedTileCount = 2;
|
||||
|
||||
@@ -45,7 +45,7 @@ struct CsrSelectionArgs {
|
||||
if (dst) {
|
||||
processResource(*dst, rootDeviceIndex, this->dstResource);
|
||||
}
|
||||
this->direction = createTransferDirection(srcResource.isLocal, dstResource.isLocal);
|
||||
this->direction = createTransferDirection(srcResource.isLocal, dstResource.isLocal, false);
|
||||
}
|
||||
|
||||
CsrSelectionArgs(cl_command_type cmdType, Image *src, Image *dst, uint32_t rootDeviceIndex, const size_t *size, const size_t *srcOrigin, const size_t *dstOrigin)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2023 Intel Corporation
|
||||
* Copyright (C) 2023-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -13,9 +13,14 @@ enum class TransferDirection {
|
||||
hostToLocal,
|
||||
localToHost,
|
||||
localToLocal,
|
||||
remote,
|
||||
};
|
||||
|
||||
inline TransferDirection createTransferDirection(bool srcLocal, bool dstLocal) {
|
||||
inline TransferDirection createTransferDirection(bool srcLocal, bool dstLocal, bool remoteCopy) {
|
||||
if (remoteCopy) {
|
||||
return TransferDirection::remote;
|
||||
}
|
||||
|
||||
if (srcLocal) {
|
||||
if (dstLocal) {
|
||||
return TransferDirection::localToLocal;
|
||||
|
||||
Reference in New Issue
Block a user