Add initial BCS split implementation for L0

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk
2022-09-09 09:43:47 +00:00
committed by Compute-Runtime-Automation
parent fad4bee432
commit 63e72965a1
7 changed files with 163 additions and 13 deletions

View File

@@ -15,17 +15,20 @@
namespace L0 {
void BcsSplit::setupDevice(uint32_t productFamily, bool internalUsage, const ze_command_queue_desc_t *desc, NEO::CommandStreamReceiver *csr) {
bool BcsSplit::setupDevice(uint32_t productFamily, bool internalUsage, const ze_command_queue_desc_t *desc, NEO::CommandStreamReceiver *csr) {
auto initializeBcsSplit = this->device.getNEODevice()->isBcsSplitSupported() &&
csr->getOsContext().getEngineType() == aub_stream::EngineType::ENGINE_BCS &&
!internalUsage;
if (!initializeBcsSplit) {
return false;
}
static std::mutex bcsSplitInitMutex;
std::lock_guard<std::mutex> lock(bcsSplitInitMutex);
auto initializeBcsSplit = this->device.getNEODevice()->isBcsSplitSupported() &&
csr->getOsContext().getEngineType() == aub_stream::EngineType::ENGINE_BCS &&
!internalUsage &&
this->cmdQs.empty();
if (!initializeBcsSplit) {
return;
if (!this->cmdQs.empty()) {
return true;
}
if (NEO::DebugManager.flags.SplitBcsMask.get() > 0) {
@@ -48,6 +51,8 @@ void BcsSplit::setupDevice(uint32_t productFamily, bool internalUsage, const ze_
this->cmdQs.push_back(commandQueue);
}
}
return true;
}
void BcsSplit::releaseResources() {

View File

@@ -10,8 +10,11 @@
#include "shared/source/helpers/engine_node_helper.h"
#include "shared/source/sku_info/sku_info_base.h"
#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h"
#include "level_zero/core/source/cmdqueue/cmdqueue.h"
#include "level_zero/core/source/event/event.h"
#include <functional>
#include <vector>
namespace NEO {
@@ -28,7 +31,39 @@ struct BcsSplit {
std::vector<CommandQueue *> cmdQs;
NEO::BcsInfoMask engines = NEO::EngineHelpers::oddLinkedCopyEnginesMask;
void setupDevice(uint32_t productFamily, bool internalUsage, const ze_command_queue_desc_t *desc, NEO::CommandStreamReceiver *csr);
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t appendSplitCall(CommandListCoreFamilyImmediate<gfxCoreFamily> *cmdList,
void *dstptr,
const void *srcptr,
size_t size,
ze_event_handle_t hSignalEvent,
std::function<ze_result_t(void *, const void *, size_t, ze_event_handle_t)> appendCall) {
if (hSignalEvent) {
cmdList->appendEventForProfilingAllWalkers(Event::fromHandle(hSignalEvent), true);
}
auto totalSize = size;
auto engineCount = this->cmdQs.size();
for (size_t i = 0; i < this->cmdQs.size(); i++) {
auto localSize = totalSize / engineCount;
auto localDstPtr = ptrOffset(dstptr, size - totalSize);
auto localSrcPtr = ptrOffset(srcptr, size - totalSize);
appendCall(localDstPtr, localSrcPtr, localSize, nullptr);
cmdList->executeCommandListImmediateImpl(true, this->cmdQs[i]);
totalSize -= localSize;
engineCount--;
}
if (hSignalEvent) {
cmdList->appendEventForProfilingAllWalkers(Event::fromHandle(hSignalEvent), false);
}
return ZE_RESULT_SUCCESS;
}
bool setupDevice(uint32_t productFamily, bool internalUsage, const ze_command_queue_desc_t *desc, NEO::CommandStreamReceiver *csr);
void releaseResources();
BcsSplit(DeviceImp &device) : device(device){};