Add event management to L0 bcs split

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk
2022-09-12 12:00:04 +00:00
committed by Compute-Runtime-Automation
parent 0fb52f7d45
commit 28e6504d41
6 changed files with 222 additions and 9 deletions

View File

@@ -157,6 +157,7 @@ struct CommandListCoreFamily : CommandListImp {
void appendMultiPartitionPrologue(uint32_t partitionDataSize) override;
void appendMultiPartitionEpilogue() override;
void appendEventForProfilingAllWalkers(Event *event, bool beforeWalker);
ze_result_t addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents);
ze_result_t reserveSpace(size_t size, void **ptr) override;
ze_result_t reset() override;
@@ -259,7 +260,6 @@ struct CommandListCoreFamily : CommandListImp {
size_t estimateBufferSizeMultiTileBarrier(const NEO::HardwareInfo &hwInfo);
uint64_t getInputBufferSize(NEO::ImageType imageType, uint64_t bytesPerPixel, const ze_image_region_t *region);
MOCKABLE_VIRTUAL AlignedAllocationData getAlignedAllocation(Device *device, const void *buffer, uint64_t bufferSize, bool hostCopyAllowed);
ze_result_t addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents);
void addFlushRequiredCommand(bool flushOperationRequired, Event *signalEvent);
virtual void createLogicalStateHelper();

View File

@@ -60,4 +60,69 @@ void BcsSplit::releaseResources() {
cmdQ->destroy();
}
}
BcsSplit::Events::~Events() {
for (auto &markerEvent : this->marker) {
markerEvent->destroy();
}
for (auto &subcopyEvent : this->subcopy) {
subcopyEvent->destroy();
}
for (auto &pool : this->pools) {
pool->destroy();
}
}
size_t BcsSplit::Events::obtainForSplit(Context *context, size_t maxEventCountInPool) {
for (size_t i = 0; i < this->marker.size(); i++) {
auto ret = this->marker[i]->queryStatus();
if (ret == ZE_RESULT_SUCCESS) {
this->marker[i]->reset();
for (size_t j = 0; j < this->bcsSplit.cmdQs.size(); j++) {
this->subcopy[i * this->bcsSplit.cmdQs.size() + j]->reset();
}
return i;
}
}
return this->allocateNew(context, maxEventCountInPool);
}
size_t BcsSplit::Events::allocateNew(Context *context, size_t maxEventCountInPool) {
const size_t neededEvents = this->bcsSplit.cmdQs.size() + 1;
if (this->pools.empty() ||
this->createdFromLatestPool + neededEvents > maxEventCountInPool) {
ze_result_t result;
ze_event_pool_desc_t desc{};
desc.stype = ZE_STRUCTURE_TYPE_EVENT_POOL_DESC;
desc.count = static_cast<uint32_t>(maxEventCountInPool);
auto hDevice = this->bcsSplit.device.toHandle();
auto pool = EventPool::create(this->bcsSplit.device.getDriverHandle(), context, 1, &hDevice, &desc, result);
this->pools.push_back(pool);
this->createdFromLatestPool = 0u;
}
auto pool = this->pools[this->pools.size() - 1];
ze_event_desc_t desc{};
desc.stype = ZE_STRUCTURE_TYPE_EVENT_DESC;
desc.signal = ZE_EVENT_SCOPE_FLAG_DEVICE;
for (size_t i = 0; i < neededEvents; i++) {
desc.index = static_cast<uint32_t>(this->createdFromLatestPool++);
if (i == neededEvents - 1) {
desc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
}
ze_event_handle_t hEvent;
pool->createEvent(&desc, &hEvent);
if (i == neededEvents - 1) {
this->marker.push_back(Event::fromHandle(hEvent));
} else {
this->subcopy.push_back(Event::fromHandle(hEvent));
}
}
return this->marker.size() - 1;
}
} // namespace L0

View File

@@ -12,6 +12,7 @@
#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h"
#include "level_zero/core/source/cmdqueue/cmdqueue.h"
#include "level_zero/core/source/context/context.h"
#include "level_zero/core/source/event/event.h"
#include <functional>
@@ -28,6 +29,21 @@ struct DeviceImp;
struct BcsSplit {
DeviceImp &device;
struct Events {
BcsSplit &bcsSplit;
std::vector<EventPool *> pools;
std::vector<Event *> subcopy;
std::vector<Event *> marker;
size_t createdFromLatestPool = 0u;
size_t obtainForSplit(Context *context, size_t maxEventCountInPool);
size_t allocateNew(Context *context, size_t maxEventCountInPool);
Events(BcsSplit &bcsSplit) : bcsSplit(bcsSplit){};
~Events();
} events;
std::vector<CommandQueue *> cmdQs;
NEO::BcsInfoMask engines = NEO::EngineHelpers::oddLinkedCopyEnginesMask;
@@ -38,10 +54,16 @@ struct BcsSplit {
size_t size,
ze_event_handle_t hSignalEvent,
std::function<ze_result_t(void *, const void *, size_t, ze_event_handle_t)> appendCall) {
ze_result_t result = ZE_RESULT_SUCCESS;
if (hSignalEvent) {
cmdList->appendEventForProfilingAllWalkers(Event::fromHandle(hSignalEvent), true);
}
auto markerEventIndex = this->events.obtainForSplit(Context::fromHandle(cmdList->hContext), MemoryConstants::pageSize64k / sizeof(typename CommandListCoreFamilyImmediate<gfxCoreFamily>::GfxFamily::TimestampPacketType));
auto subcopyEventIndex = markerEventIndex * this->cmdQs.size();
StackVec<ze_event_handle_t, 4> eventHandles;
auto totalSize = size;
auto engineCount = this->cmdQs.size();
for (size_t i = 0; i < this->cmdQs.size(); i++) {
@@ -49,24 +71,29 @@ struct BcsSplit {
auto localDstPtr = ptrOffset(dstptr, size - totalSize);
auto localSrcPtr = ptrOffset(srcptr, size - totalSize);
appendCall(localDstPtr, localSrcPtr, localSize, nullptr);
auto eventHandle = this->events.subcopy[subcopyEventIndex + i]->toHandle();
result = appendCall(localDstPtr, localSrcPtr, localSize, eventHandle);
cmdList->executeCommandListImmediateImpl(true, this->cmdQs[i]);
eventHandles.push_back(eventHandle);
totalSize -= localSize;
engineCount--;
}
cmdList->addEventsToCmdList(static_cast<uint32_t>(this->cmdQs.size()), eventHandles.data());
cmdList->appendSignalEvent(this->events.marker[markerEventIndex]->toHandle());
if (hSignalEvent) {
cmdList->appendEventForProfilingAllWalkers(Event::fromHandle(hSignalEvent), false);
}
return ZE_RESULT_SUCCESS;
return result;
}
bool setupDevice(uint32_t productFamily, bool internalUsage, const ze_command_queue_desc_t *desc, NEO::CommandStreamReceiver *csr);
void releaseResources();
BcsSplit(DeviceImp &device) : device(device){};
BcsSplit(DeviceImp &device) : device(device), events(*this){};
};
} // namespace L0

View File

@@ -101,7 +101,11 @@ struct Mock<L0::DeviceImp> : public L0::DeviceImp {
using Base = L0::DeviceImp;
using Base::adjustCommandQueueDesc;
using Base::debugSession;
using Base::getNEODevice;
using Base::implicitScalingCapable;
using Base::neoDevice;
Mock() = default;
explicit Mock(NEO::Device *device, NEO::ExecutionEnvironment *execEnv) {
device->incRefInternal();

View File

@@ -1359,8 +1359,9 @@ HWTEST2_F(CommandListCreate, givenNonEmptyCommandsToPatchWhenClearCommandsToPatc
}
template <NEO::AllocationType AllocType>
class MyDeviceMock : public Mock<Device> {
class MyDeviceMock : public Mock<DeviceImp> {
public:
using Mock<L0::DeviceImp>::Mock;
NEO::GraphicsAllocation *allocateMemoryFromHostPtr(const void *buffer, size_t size, bool hostCopyAllowed) override {
auto alloc = std::make_unique<NEO::MockGraphicsAllocation>(const_cast<void *>(buffer), reinterpret_cast<uintptr_t>(buffer), size);
alloc->allocationType = AllocType;
@@ -1372,7 +1373,7 @@ class MyDeviceMock : public Mock<Device> {
};
HWTEST2_F(CommandListCreate, givenHostPtrAllocAllocWhenInternalMemCreatedThenNewAllocAddedToDealocationContainer, IsAtLeastSkl) {
auto myDevice = std::make_unique<MyDeviceMock<NEO::AllocationType::INTERNAL_HOST_MEMORY>>();
auto myDevice = std::make_unique<MyDeviceMock<NEO::AllocationType::INTERNAL_HOST_MEMORY>>(device->getNEODevice(), execEnv);
myDevice->neoDevice = device->getNEODevice();
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(myDevice.get(), NEO::EngineGroupType::Copy, 0u);
@@ -1387,7 +1388,7 @@ HWTEST2_F(CommandListCreate, givenHostPtrAllocAllocWhenInternalMemCreatedThenNew
}
HWTEST2_F(CommandListCreate, givenHostPtrAllocAllocWhenExternalMemCreatedThenNewAllocAddedToHostPtrMap, IsAtLeastSkl) {
auto myDevice = std::make_unique<MyDeviceMock<NEO::AllocationType::EXTERNAL_HOST_PTR>>();
auto myDevice = std::make_unique<MyDeviceMock<NEO::AllocationType::EXTERNAL_HOST_PTR>>(device->getNEODevice(), execEnv);
myDevice->neoDevice = device->getNEODevice();
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(myDevice.get(), NEO::EngineGroupType::Copy, 0u);
@@ -1402,7 +1403,7 @@ HWTEST2_F(CommandListCreate, givenHostPtrAllocAllocWhenExternalMemCreatedThenNew
}
HWTEST2_F(CommandListCreate, givenGetAlignedAllocationWhenInternalMemWithinDifferentAllocThenReturnNewAlloc, IsAtLeastSkl) {
auto myDevice = std::make_unique<MyDeviceMock<NEO::AllocationType::INTERNAL_HOST_MEMORY>>();
auto myDevice = std::make_unique<MyDeviceMock<NEO::AllocationType::INTERNAL_HOST_MEMORY>>(device->getNEODevice(), execEnv);
myDevice->neoDevice = device->getNEODevice();
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(myDevice.get(), NEO::EngineGroupType::Copy, 0u);
@@ -1416,7 +1417,7 @@ HWTEST2_F(CommandListCreate, givenGetAlignedAllocationWhenInternalMemWithinDiffe
commandList->commandContainer.getDeallocationContainer().clear();
}
HWTEST2_F(CommandListCreate, givenGetAlignedAllocationWhenExternalMemWithinDifferentAllocThenReturnPreviouslyAllocatedMem, IsAtLeastSkl) {
auto myDevice = std::make_unique<MyDeviceMock<NEO::AllocationType::EXTERNAL_HOST_PTR>>();
auto myDevice = std::make_unique<MyDeviceMock<NEO::AllocationType::EXTERNAL_HOST_PTR>>(device->getNEODevice(), execEnv);
myDevice->neoDevice = device->getNEODevice();
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(myDevice.get(), NEO::EngineGroupType::Copy, 0u);

View File

@@ -362,5 +362,121 @@ HWTEST2_F(CommandQueueCommandsXeHpc, givenSplitBcsCopyAndImmediateCommandListWhe
EXPECT_NE(cmdList.end(), itor);
}
HWTEST2_F(CommandQueueCommandsXeHpc, givenSplitBcsCopyAndImmediateCommandListWhenAllocateNewEventsForSplitThenEventsAreManagedProperly, IsXeHpcCore) {
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
DebugManagerStateRestore restorer;
DebugManager.flags.SplitBcsCopy.set(1);
ze_result_t returnValue;
auto hwInfo = *NEO::defaultHwInfo;
hwInfo.featureTable.ftrBcsInfo = 0b111111111;
hwInfo.capabilityTable.blitterOperationsSupported = true;
auto testNeoDevice = NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo);
auto testL0Device = std::unique_ptr<L0::Device>(L0::Device::create(driverHandle.get(), testNeoDevice, false, &returnValue));
ze_command_queue_desc_t desc = {};
desc.ordinal = static_cast<uint32_t>(testNeoDevice->getEngineGroupIndexFromEngineGroupType(NEO::EngineGroupType::Copy));
std::unique_ptr<L0::CommandList> commandList0(CommandList::createImmediate(productFamily,
testL0Device.get(),
&desc,
false,
NEO::EngineGroupType::Copy,
returnValue));
ASSERT_NE(nullptr, commandList0);
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs.size(), 4u);
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 0u);
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[1])->getTaskCount(), 0u);
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[2])->getTaskCount(), 0u);
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[3])->getTaskCount(), 0u);
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.pools.size(), 0u);
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.marker.size(), 0u);
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.subcopy.size(), 0u);
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.createdFromLatestPool, 0u);
static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.allocateNew(Context::fromHandle(commandList0->hContext), 10);
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.pools.size(), 1u);
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.marker.size(), 1u);
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.subcopy.size(), 4u);
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.createdFromLatestPool, 5u);
static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.allocateNew(Context::fromHandle(commandList0->hContext), 10);
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.pools.size(), 1u);
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.marker.size(), 2u);
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.subcopy.size(), 8u);
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.createdFromLatestPool, 10u);
static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.allocateNew(Context::fromHandle(commandList0->hContext), 10);
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.pools.size(), 2u);
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.marker.size(), 3u);
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.subcopy.size(), 12u);
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.createdFromLatestPool, 5u);
}
HWTEST2_F(CommandQueueCommandsXeHpc, givenSplitBcsCopyAndImmediateCommandListWhenObtainEventsForSplitThenReuseEventsIfMarkerIsSignaled, IsXeHpcCore) {
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
DebugManagerStateRestore restorer;
DebugManager.flags.SplitBcsCopy.set(1);
ze_result_t returnValue;
auto hwInfo = *NEO::defaultHwInfo;
hwInfo.featureTable.ftrBcsInfo = 0b111111111;
hwInfo.capabilityTable.blitterOperationsSupported = true;
auto testNeoDevice = NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo);
auto testL0Device = std::unique_ptr<L0::Device>(L0::Device::create(driverHandle.get(), testNeoDevice, false, &returnValue));
ze_command_queue_desc_t desc = {};
desc.ordinal = static_cast<uint32_t>(testNeoDevice->getEngineGroupIndexFromEngineGroupType(NEO::EngineGroupType::Copy));
std::unique_ptr<L0::CommandList> commandList0(CommandList::createImmediate(productFamily,
testL0Device.get(),
&desc,
false,
NEO::EngineGroupType::Copy,
returnValue));
ASSERT_NE(nullptr, commandList0);
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs.size(), 4u);
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 0u);
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[1])->getTaskCount(), 0u);
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[2])->getTaskCount(), 0u);
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[3])->getTaskCount(), 0u);
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.pools.size(), 0u);
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.marker.size(), 0u);
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.subcopy.size(), 0u);
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.createdFromLatestPool, 0u);
static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.allocateNew(Context::fromHandle(commandList0->hContext), 10);
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.pools.size(), 1u);
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.marker.size(), 1u);
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.subcopy.size(), 4u);
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.createdFromLatestPool, 5u);
auto ret = static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.obtainForSplit(Context::fromHandle(commandList0->hContext), 10);
EXPECT_EQ(ret, 1u);
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.pools.size(), 1u);
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.marker.size(), 2u);
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.subcopy.size(), 8u);
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.createdFromLatestPool, 10u);
static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.marker[1]->hostSignal();
ret = static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.obtainForSplit(Context::fromHandle(commandList0->hContext), 10);
EXPECT_EQ(ret, 1u);
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.pools.size(), 1u);
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.marker.size(), 2u);
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.subcopy.size(), 8u);
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.createdFromLatestPool, 10u);
}
} // namespace ult
} // namespace L0