mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-10 15:12:56 +08:00
Add initial BCS split implementation for L0
Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
fad4bee432
commit
63e72965a1
@@ -293,6 +293,7 @@ struct CommandList : _ze_command_list_handle_t {
|
||||
bool isTbxMode = false;
|
||||
bool commandListSLMEnabled = false;
|
||||
bool requiresQueueUncachedMocs = false;
|
||||
bool isBcsSplitNeeded = false;
|
||||
|
||||
protected:
|
||||
NEO::GraphicsAllocation *getAllocationFromHostPtrMap(const void *buffer, uint64_t bufferSize);
|
||||
|
||||
@@ -156,6 +156,7 @@ struct CommandListCoreFamily : CommandListImp {
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override;
|
||||
void appendMultiPartitionPrologue(uint32_t partitionDataSize) override;
|
||||
void appendMultiPartitionEpilogue() override;
|
||||
void appendEventForProfilingAllWalkers(Event *event, bool beforeWalker);
|
||||
|
||||
ze_result_t reserveSpace(size_t size, void **ptr) override;
|
||||
ze_result_t reset() override;
|
||||
@@ -249,7 +250,6 @@ struct CommandListCoreFamily : CommandListImp {
|
||||
void appendWriteKernelTimestamp(Event *event, bool beforeWalker, bool maskLsb, bool workloadPartition);
|
||||
void adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, bool maskLsb, uint32_t mask, bool workloadPartition);
|
||||
void appendEventForProfiling(Event *event, bool beforeWalker, bool workloadPartition);
|
||||
void appendEventForProfilingAllWalkers(Event *event, bool beforeWalker);
|
||||
void appendEventForProfilingCopyCommand(Event *event, bool beforeWalker);
|
||||
void appendSignalEventPostWalker(Event *event, bool workloadPartition);
|
||||
virtual void programStateBaseAddress(NEO::CommandContainer &container, bool genericMediaStateClearRequired);
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
#include "shared/source/memory_manager/prefetch_manager.h"
|
||||
|
||||
#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h"
|
||||
#include "level_zero/core/source/device/bcs_split.h"
|
||||
|
||||
namespace L0 {
|
||||
|
||||
@@ -215,8 +216,18 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopy(
|
||||
if (this->isFlushTaskSubmissionEnabled) {
|
||||
checkAvailableSpace();
|
||||
}
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(dstptr, srcptr, size, hSignalEvent,
|
||||
numWaitEvents, phWaitEvents);
|
||||
|
||||
ze_result_t ret;
|
||||
|
||||
if (this->isBcsSplitNeeded) {
|
||||
ret = static_cast<DeviceImp *>(this->device)->bcsSplit.appendSplitCall(this, dstptr, srcptr, size, hSignalEvent, [&](void *dstptrParam, const void *srcptrParam, size_t sizeParam, ze_event_handle_t hSignalEventParam) {
|
||||
return CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(dstptrParam, srcptrParam, sizeParam, hSignalEventParam, numWaitEvents, phWaitEvents);
|
||||
});
|
||||
} else {
|
||||
ret = CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(dstptr, srcptr, size, hSignalEvent,
|
||||
numWaitEvents, phWaitEvents);
|
||||
}
|
||||
|
||||
return flushImmediate(ret, true);
|
||||
}
|
||||
|
||||
|
||||
@@ -148,7 +148,7 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device
|
||||
commandList->isTbxMode = (csr->getType() == NEO::CommandStreamReceiverType::CSR_TBX) || (csr->getType() == NEO::CommandStreamReceiverType::CSR_TBX_WITH_AUB);
|
||||
commandList->commandListPreemptionMode = device->getDevicePreemptionMode();
|
||||
|
||||
deviceImp->bcsSplit.setupDevice(productFamily, internalUsage, desc, csr);
|
||||
commandList->isBcsSplitNeeded = deviceImp->bcsSplit.setupDevice(productFamily, internalUsage, desc, csr);
|
||||
|
||||
return commandList;
|
||||
}
|
||||
|
||||
@@ -15,17 +15,20 @@
|
||||
|
||||
namespace L0 {
|
||||
|
||||
void BcsSplit::setupDevice(uint32_t productFamily, bool internalUsage, const ze_command_queue_desc_t *desc, NEO::CommandStreamReceiver *csr) {
|
||||
bool BcsSplit::setupDevice(uint32_t productFamily, bool internalUsage, const ze_command_queue_desc_t *desc, NEO::CommandStreamReceiver *csr) {
|
||||
auto initializeBcsSplit = this->device.getNEODevice()->isBcsSplitSupported() &&
|
||||
csr->getOsContext().getEngineType() == aub_stream::EngineType::ENGINE_BCS &&
|
||||
!internalUsage;
|
||||
|
||||
if (!initializeBcsSplit) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static std::mutex bcsSplitInitMutex;
|
||||
std::lock_guard<std::mutex> lock(bcsSplitInitMutex);
|
||||
|
||||
auto initializeBcsSplit = this->device.getNEODevice()->isBcsSplitSupported() &&
|
||||
csr->getOsContext().getEngineType() == aub_stream::EngineType::ENGINE_BCS &&
|
||||
!internalUsage &&
|
||||
this->cmdQs.empty();
|
||||
|
||||
if (!initializeBcsSplit) {
|
||||
return;
|
||||
if (!this->cmdQs.empty()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (NEO::DebugManager.flags.SplitBcsMask.get() > 0) {
|
||||
@@ -48,6 +51,8 @@ void BcsSplit::setupDevice(uint32_t productFamily, bool internalUsage, const ze_
|
||||
this->cmdQs.push_back(commandQueue);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void BcsSplit::releaseResources() {
|
||||
|
||||
@@ -10,8 +10,11 @@
|
||||
#include "shared/source/helpers/engine_node_helper.h"
|
||||
#include "shared/source/sku_info/sku_info_base.h"
|
||||
|
||||
#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h"
|
||||
#include "level_zero/core/source/cmdqueue/cmdqueue.h"
|
||||
#include "level_zero/core/source/event/event.h"
|
||||
|
||||
#include <functional>
|
||||
#include <vector>
|
||||
|
||||
namespace NEO {
|
||||
@@ -28,7 +31,39 @@ struct BcsSplit {
|
||||
std::vector<CommandQueue *> cmdQs;
|
||||
NEO::BcsInfoMask engines = NEO::EngineHelpers::oddLinkedCopyEnginesMask;
|
||||
|
||||
void setupDevice(uint32_t productFamily, bool internalUsage, const ze_command_queue_desc_t *desc, NEO::CommandStreamReceiver *csr);
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t appendSplitCall(CommandListCoreFamilyImmediate<gfxCoreFamily> *cmdList,
|
||||
void *dstptr,
|
||||
const void *srcptr,
|
||||
size_t size,
|
||||
ze_event_handle_t hSignalEvent,
|
||||
std::function<ze_result_t(void *, const void *, size_t, ze_event_handle_t)> appendCall) {
|
||||
if (hSignalEvent) {
|
||||
cmdList->appendEventForProfilingAllWalkers(Event::fromHandle(hSignalEvent), true);
|
||||
}
|
||||
|
||||
auto totalSize = size;
|
||||
auto engineCount = this->cmdQs.size();
|
||||
for (size_t i = 0; i < this->cmdQs.size(); i++) {
|
||||
auto localSize = totalSize / engineCount;
|
||||
auto localDstPtr = ptrOffset(dstptr, size - totalSize);
|
||||
auto localSrcPtr = ptrOffset(srcptr, size - totalSize);
|
||||
|
||||
appendCall(localDstPtr, localSrcPtr, localSize, nullptr);
|
||||
cmdList->executeCommandListImmediateImpl(true, this->cmdQs[i]);
|
||||
|
||||
totalSize -= localSize;
|
||||
engineCount--;
|
||||
}
|
||||
|
||||
if (hSignalEvent) {
|
||||
cmdList->appendEventForProfilingAllWalkers(Event::fromHandle(hSignalEvent), false);
|
||||
}
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
bool setupDevice(uint32_t productFamily, bool internalUsage, const ze_command_queue_desc_t *desc, NEO::CommandStreamReceiver *csr);
|
||||
void releaseResources();
|
||||
|
||||
BcsSplit(DeviceImp &device) : device(device){};
|
||||
|
||||
@@ -264,5 +264,103 @@ HWTEST2_F(CommandQueueCommandsXeHpc, givenSplitBcsCopySetZeroWhenCreateImmediate
|
||||
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs.size(), 0u);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandQueueCommandsXeHpc, givenSplitBcsCopyAndImmediateCommandListWhenAppendingMemoryCopyThenSuccessIsReturned, IsXeHpcCore) {
|
||||
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.SplitBcsCopy.set(1);
|
||||
|
||||
ze_result_t returnValue;
|
||||
auto hwInfo = *NEO::defaultHwInfo;
|
||||
hwInfo.featureTable.ftrBcsInfo = 0b111111111;
|
||||
hwInfo.capabilityTable.blitterOperationsSupported = true;
|
||||
auto testNeoDevice = NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo);
|
||||
auto testL0Device = std::unique_ptr<L0::Device>(L0::Device::create(driverHandle.get(), testNeoDevice, false, &returnValue));
|
||||
|
||||
ze_command_queue_desc_t desc = {};
|
||||
desc.ordinal = static_cast<uint32_t>(testNeoDevice->getEngineGroupIndexFromEngineGroupType(NEO::EngineGroupType::Copy));
|
||||
|
||||
std::unique_ptr<L0::CommandList> commandList0(CommandList::createImmediate(productFamily,
|
||||
testL0Device.get(),
|
||||
&desc,
|
||||
false,
|
||||
NEO::EngineGroupType::Copy,
|
||||
returnValue));
|
||||
ASSERT_NE(nullptr, commandList0);
|
||||
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs.size(), 4u);
|
||||
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 0u);
|
||||
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[1])->getTaskCount(), 0u);
|
||||
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[2])->getTaskCount(), 0u);
|
||||
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[3])->getTaskCount(), 0u);
|
||||
|
||||
void *srcPtr = reinterpret_cast<void *>(0x1234);
|
||||
void *dstPtr = reinterpret_cast<void *>(0x2345);
|
||||
|
||||
auto result = commandList0->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 1u);
|
||||
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[1])->getTaskCount(), 1u);
|
||||
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[2])->getTaskCount(), 1u);
|
||||
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[3])->getTaskCount(), 1u);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandQueueCommandsXeHpc, givenSplitBcsCopyAndImmediateCommandListWhenAppendingMemoryCopyWithEventThenSuccessIsReturnedAndMiFlushProgrammed, IsXeHpcCore) {
|
||||
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.SplitBcsCopy.set(1);
|
||||
|
||||
ze_result_t returnValue;
|
||||
auto hwInfo = *NEO::defaultHwInfo;
|
||||
hwInfo.featureTable.ftrBcsInfo = 0b111111111;
|
||||
hwInfo.capabilityTable.blitterOperationsSupported = true;
|
||||
auto testNeoDevice = NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo);
|
||||
auto testL0Device = std::unique_ptr<L0::Device>(L0::Device::create(driverHandle.get(), testNeoDevice, false, &returnValue));
|
||||
|
||||
ze_command_queue_desc_t desc = {};
|
||||
desc.ordinal = static_cast<uint32_t>(testNeoDevice->getEngineGroupIndexFromEngineGroupType(NEO::EngineGroupType::Copy));
|
||||
|
||||
std::unique_ptr<L0::CommandList> commandList0(CommandList::createImmediate(productFamily,
|
||||
testL0Device.get(),
|
||||
&desc,
|
||||
false,
|
||||
NEO::EngineGroupType::Copy,
|
||||
returnValue));
|
||||
ASSERT_NE(nullptr, commandList0);
|
||||
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs.size(), 4u);
|
||||
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 0u);
|
||||
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[1])->getTaskCount(), 0u);
|
||||
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[2])->getTaskCount(), 0u);
|
||||
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[3])->getTaskCount(), 0u);
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
|
||||
eventPoolDesc.count = 1;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.index = 0;
|
||||
eventDesc.wait = 0;
|
||||
eventDesc.signal = 0;
|
||||
|
||||
std::unique_ptr<EventPool> eventPool = std::unique_ptr<EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
|
||||
std::unique_ptr<Event> event = std::unique_ptr<Event>(Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
|
||||
void *srcPtr = reinterpret_cast<void *>(0x1234);
|
||||
void *dstPtr = reinterpret_cast<void *>(0x2345);
|
||||
|
||||
auto result = commandList0->appendMemoryCopy(dstPtr, srcPtr, 8, event->toHandle(), 0, nullptr);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 1u);
|
||||
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[1])->getTaskCount(), 1u);
|
||||
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[2])->getTaskCount(), 1u);
|
||||
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[3])->getTaskCount(), 1u);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, commandList0->commandContainer.getCommandStream()->getCpuBase(), commandList0->commandContainer.getCommandStream()->getUsed()));
|
||||
|
||||
auto itor = find<MI_FLUSH_DW *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
Reference in New Issue
Block a user