Add engine type to commandList

Change-Id: I108fc027dd4698ebecb224c20b92f2b741698f3c
This commit is contained in:
Kamil Diedrich
2020-09-21 15:25:54 +02:00
committed by sys_ocldev
parent 76a9ccc095
commit 877a781696
22 changed files with 213 additions and 217 deletions

View File

@@ -106,7 +106,7 @@ void CommandList::eraseResidencyContainerEntry(NEO::GraphicsAllocation *allocati
}
bool CommandList::isCopyOnly() const {
return isCopyOnlyCmdList;
return NEO::EngineGroupType::Copy == engineGroupType;
}
NEO::PreemptionMode CommandList::obtainFunctionPreemptionMode(Kernel *kernel) {

View File

@@ -128,11 +128,11 @@ struct CommandList : _ze_command_list_handle_t {
virtual ze_result_t appendMINoop() = 0;
virtual ze_result_t appendPipeControl(void *dstPtr, uint64_t value) = 0;
static CommandList *create(uint32_t productFamily, Device *device, bool isCopyOnly,
static CommandList *create(uint32_t productFamily, Device *device, NEO::EngineGroupType engineGroupType,
ze_result_t &resultValue);
static CommandList *createImmediate(uint32_t productFamily, Device *device,
const ze_command_queue_desc_t *desc,
bool internalUsage, bool isCopyOnly,
bool internalUsage, NEO::EngineGroupType engineGroupType,
ze_result_t &resultValue);
static CommandList *fromHandle(ze_command_list_handle_t handle) {
@@ -181,7 +181,7 @@ struct CommandList : _ze_command_list_handle_t {
std::vector<Kernel *> printfFunctionContainer;
virtual ze_result_t executeCommandListImmediate(bool performMigration) = 0;
virtual ze_result_t initialize(Device *device, bool isCopyOnly) = 0;
virtual ze_result_t initialize(Device *device, NEO::EngineGroupType engineGroupType) = 0;
virtual ~CommandList();
NEO::CommandContainer commandContainer;
@@ -189,7 +189,7 @@ struct CommandList : _ze_command_list_handle_t {
std::map<const void *, NEO::GraphicsAllocation *> hostPtrMap;
uint32_t commandListPerThreadScratchSize = 0u;
NEO::PreemptionMode commandListPreemptionMode = NEO::PreemptionMode::Initial;
bool isCopyOnlyCmdList = false;
NEO::EngineGroupType engineGroupType;
UnifiedMemoryControls unifiedMemoryControls;
bool indirectAllocationsAllowed = false;
NEO::GraphicsAllocation *getAllocationFromHostPtrMap(const void *buffer, uint64_t bufferSize);

View File

@@ -35,8 +35,7 @@ struct CommandListCoreFamily : CommandListImp {
using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS;
using CommandListImp::CommandListImp;
ze_result_t initialize(Device *device, bool isCopyOnly) override;
ze_result_t initialize(Device *device, NEO::EngineGroupType engineGroupType) override;
virtual void programL3(bool isSLMused);
ze_result_t close() override;

View File

@@ -53,16 +53,16 @@ inline ze_result_t parseErrorCode(NEO::ErrorCode returnValue) {
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, bool isCopyOnly) {
ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO::EngineGroupType engineGroupType) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
this->device = device;
this->commandListPreemptionMode = device->getDevicePreemptionMode();
this->isCopyOnlyCmdList = isCopyOnly;
this->engineGroupType = engineGroupType;
auto returnValue = commandContainer.initialize(static_cast<DeviceImp *>(device)->neoDevice);
ze_result_t returnType = parseErrorCode(returnValue);
if (returnType == ZE_RESULT_SUCCESS) {
if (!isCopyOnly) {
if (!isCopyOnly()) {
programStateBaseAddress(commandContainer);
}
}
@@ -215,7 +215,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
if (isCopyOnlyCmdList) {
if (isCopyOnly()) {
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, false, false);
} else {
NEO::PipeControlArgs args;
@@ -292,7 +292,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemory(ze_i
auto slicePitch =
image->getImageInfo().imgDesc.imageType == NEO::ImageType::Image1DArray ? 1 : pDstRegion->height * rowPitch;
if (isCopyOnlyCmdList) {
if (isCopyOnly()) {
return appendCopyImageBlit(allocationStruct.alloc, image->getAllocation(),
{0, 0, 0}, {pDstRegion->originX, pDstRegion->originY, pDstRegion->originZ}, rowPitch, slicePitch,
rowPitch, slicePitch, bytesPerPixel, {pDstRegion->width, pDstRegion->height, pDstRegion->depth}, {pDstRegion->width, pDstRegion->height, pDstRegion->depth}, imgSize, hEvent);
@@ -408,7 +408,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(void *
auto slicePitch =
(image->getImageInfo().imgDesc.imageType == NEO::ImageType::Image1DArray ? 1 : pSrcRegion->height) * rowPitch;
if (isCopyOnlyCmdList) {
if (isCopyOnly()) {
return appendCopyImageBlit(image->getAllocation(), allocationStruct.alloc,
{pSrcRegion->originX, pSrcRegion->originY, pSrcRegion->originZ}, {0, 0, 0}, rowPitch, slicePitch,
rowPitch, slicePitch, bytesPerPixel, {pSrcRegion->width, pSrcRegion->height, pSrcRegion->depth}, imgSize, {pSrcRegion->width, pSrcRegion->height, pSrcRegion->depth}, hEvent);
@@ -538,7 +538,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion(ze_image
uint32_t groupSizeY = srcRegion.height;
uint32_t groupSizeZ = srcRegion.depth;
if (isCopyOnlyCmdList) {
if (isCopyOnly()) {
auto bytesPerPixel = static_cast<uint32_t>(srcImage->getImageInfo().surfaceFormat->ImageElementSizeInBytes);
Vec3<uint32_t> srcImgSize = {static_cast<uint32_t>(srcImage->getImageInfo().imgDesc.imageWidth),
@@ -818,48 +818,48 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
appendEventForProfiling(hSignalEvent, true);
if (ret == ZE_RESULT_SUCCESS && leftSize) {
ret = isCopyOnlyCmdList ? appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
dstAllocationStruct.alloc, dstAllocationStruct.offset,
srcAllocationStruct.alignedAllocationPtr,
srcAllocationStruct.alloc, srcAllocationStruct.offset, static_cast<uint32_t>(leftSize), hSignalEvent)
: appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
dstAllocationStruct.alloc, dstAllocationStruct.offset,
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
srcAllocationStruct.alloc, srcAllocationStruct.offset,
static_cast<uint32_t>(leftSize), 1,
Builtin::CopyBufferToBufferSide);
ret = isCopyOnly() ? appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
dstAllocationStruct.alloc, dstAllocationStruct.offset,
srcAllocationStruct.alignedAllocationPtr,
srcAllocationStruct.alloc, srcAllocationStruct.offset, static_cast<uint32_t>(leftSize), hSignalEvent)
: appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
dstAllocationStruct.alloc, dstAllocationStruct.offset,
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
srcAllocationStruct.alloc, srcAllocationStruct.offset,
static_cast<uint32_t>(leftSize), 1,
Builtin::CopyBufferToBufferSide);
}
if (ret == ZE_RESULT_SUCCESS && middleSizeBytes) {
ret = isCopyOnlyCmdList ? appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset,
srcAllocationStruct.alignedAllocationPtr,
srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset, static_cast<uint32_t>(middleSizeBytes), hSignalEvent)
: appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset,
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset,
static_cast<uint32_t>(middleSizeBytes),
static_cast<uint32_t>(middleElSize),
Builtin::CopyBufferToBufferMiddle);
ret = isCopyOnly() ? appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset,
srcAllocationStruct.alignedAllocationPtr,
srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset, static_cast<uint32_t>(middleSizeBytes), hSignalEvent)
: appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset,
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset,
static_cast<uint32_t>(middleSizeBytes),
static_cast<uint32_t>(middleElSize),
Builtin::CopyBufferToBufferMiddle);
}
if (ret == ZE_RESULT_SUCCESS && rightSize) {
ret = isCopyOnlyCmdList ? appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset,
srcAllocationStruct.alignedAllocationPtr,
srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset, static_cast<uint32_t>(rightSize), hSignalEvent)
: appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset,
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset,
static_cast<uint32_t>(rightSize), 1u,
Builtin::CopyBufferToBufferSide);
ret = isCopyOnly() ? appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset,
srcAllocationStruct.alignedAllocationPtr,
srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset, static_cast<uint32_t>(rightSize), hSignalEvent)
: appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset,
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset,
static_cast<uint32_t>(rightSize), 1u,
Builtin::CopyBufferToBufferSide);
}
this->appendSignalEventPostWalker(hSignalEvent);
if (dstAllocationStruct.needsFlush && !isCopyOnlyCmdList) {
if (dstAllocationStruct.needsFlush && !isCopyOnly()) {
NEO::PipeControlArgs args(true);
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
}
@@ -908,24 +908,24 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
ze_result_t result = ZE_RESULT_SUCCESS;
if (srcRegion->depth > 1) {
result = isCopyOnlyCmdList ? appendMemoryCopyBlitRegion(srcAllocationStruct.alloc, dstAllocationStruct.alloc, *srcRegion, *dstRegion, {srcRegion->width, srcRegion->height, srcRegion->depth},
srcPitch, srcSlicePitch, dstPitch, dstSlicePitch, srcSize3, dstSize3, hSignalEvent)
: this->appendMemoryCopyKernel3d(dstAllocationStruct.alloc, srcAllocationStruct.alloc,
Builtin::CopyBufferRectBytes3d, dstRegion, dstPitch, dstSlicePitch, dstAllocationStruct.offset,
srcRegion, srcPitch, srcSlicePitch, srcAllocationStruct.offset, hSignalEvent, 0, nullptr);
result = isCopyOnly() ? appendMemoryCopyBlitRegion(srcAllocationStruct.alloc, dstAllocationStruct.alloc, *srcRegion, *dstRegion, {srcRegion->width, srcRegion->height, srcRegion->depth},
srcPitch, srcSlicePitch, dstPitch, dstSlicePitch, srcSize3, dstSize3, hSignalEvent)
: this->appendMemoryCopyKernel3d(dstAllocationStruct.alloc, srcAllocationStruct.alloc,
Builtin::CopyBufferRectBytes3d, dstRegion, dstPitch, dstSlicePitch, dstAllocationStruct.offset,
srcRegion, srcPitch, srcSlicePitch, srcAllocationStruct.offset, hSignalEvent, 0, nullptr);
} else {
result = isCopyOnlyCmdList ? appendMemoryCopyBlitRegion(srcAllocationStruct.alloc, dstAllocationStruct.alloc, *srcRegion, *dstRegion, {srcRegion->width, srcRegion->height, srcRegion->depth},
srcPitch, srcSlicePitch, dstPitch, dstSlicePitch, srcSize3, dstSize3, hSignalEvent)
: this->appendMemoryCopyKernel2d(dstAllocationStruct.alloc, srcAllocationStruct.alloc,
Builtin::CopyBufferRectBytes2d, dstRegion, dstPitch, dstAllocationStruct.offset,
srcRegion, srcPitch, srcAllocationStruct.offset, hSignalEvent, 0, nullptr);
result = isCopyOnly() ? appendMemoryCopyBlitRegion(srcAllocationStruct.alloc, dstAllocationStruct.alloc, *srcRegion, *dstRegion, {srcRegion->width, srcRegion->height, srcRegion->depth},
srcPitch, srcSlicePitch, dstPitch, dstSlicePitch, srcSize3, dstSize3, hSignalEvent)
: this->appendMemoryCopyKernel2d(dstAllocationStruct.alloc, srcAllocationStruct.alloc,
Builtin::CopyBufferRectBytes2d, dstRegion, dstPitch, dstAllocationStruct.offset,
srcRegion, srcPitch, srcAllocationStruct.offset, hSignalEvent, 0, nullptr);
}
if (result) {
return result;
}
if (dstAllocationStruct.needsFlush && !isCopyOnlyCmdList) {
if (dstAllocationStruct.needsFlush && !isCopyOnly()) {
NEO::PipeControlArgs args(true);
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
}
@@ -1071,7 +1071,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
size_t size,
ze_event_handle_t hEvent) {
if (isCopyOnlyCmdList) {
if (isCopyOnly()) {
return appendBlitFill(ptr, pattern, patternSize, size, hEvent);
}
@@ -1341,7 +1341,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
eventSignalOffset = offsetof(KernelTimestampEvent, contextEnd);
}
if (isCopyOnlyCmdList) {
if (isCopyOnly()) {
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), ptrOffset(baseAddr, eventSignalOffset), Event::STATE_SIGNALED, false, true);
} else {
NEO::PipeControlArgs args;
@@ -1382,7 +1382,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
bool dcFlushEnable = (!event->waitScope) ? false : true;
if (dcFlushEnable) {
if (isCopyOnlyCmdList) {
if (isCopyOnly()) {
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, false, false);
} else {
NEO::PipeControlArgs args(true);

View File

@@ -49,8 +49,7 @@ ze_result_t CommandListImp::appendMetricQueryEnd(zet_metric_query_handle_t hMetr
return MetricQuery::fromHandle(hMetricQuery)->appendEnd(*this, hSignalEvent, numWaitEvents, phWaitEvents);
}
CommandList *CommandList::create(uint32_t productFamily, Device *device, bool isCopyOnly,
ze_result_t &returnValue) {
CommandList *CommandList::create(uint32_t productFamily, Device *device, NEO::EngineGroupType engineGroupType, ze_result_t &returnValue) {
CommandListAllocatorFn allocator = nullptr;
if (productFamily < IGFX_MAX_PRODUCT) {
allocator = commandListFactory[productFamily];
@@ -61,7 +60,7 @@ CommandList *CommandList::create(uint32_t productFamily, Device *device, bool is
if (allocator) {
commandList = static_cast<CommandListImp *>((*allocator)(CommandList::defaultNumIddsPerBlock));
returnValue = commandList->initialize(device, isCopyOnly);
returnValue = commandList->initialize(device, engineGroupType);
if (returnValue != ZE_RESULT_SUCCESS) {
commandList->destroy();
commandList = nullptr;
@@ -72,7 +71,7 @@ CommandList *CommandList::create(uint32_t productFamily, Device *device, bool is
CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device,
const ze_command_queue_desc_t *desc,
bool internalUsage, bool isCopyOnly,
bool internalUsage, NEO::EngineGroupType engineGroupType,
ze_result_t &returnValue) {
CommandListAllocatorFn allocator = nullptr;
@@ -85,7 +84,7 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device
if (allocator) {
commandList = static_cast<CommandListImp *>((*allocator)(CommandList::commandListimmediateIddsPerBlock));
returnValue = commandList->initialize(device, isCopyOnly);
returnValue = commandList->initialize(device, engineGroupType);
if (returnValue != ZE_RESULT_SUCCESS) {
commandList->destroy();
commandList = nullptr;
@@ -102,7 +101,7 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device
UNRECOVERABLE_IF(nullptr == csr);
auto commandQueue = CommandQueue::create(productFamily, device, csr, desc, isCopyOnly);
auto commandQueue = CommandQueue::create(productFamily, device, csr, desc, NEO::EngineGroupType::Copy == engineGroupType);
if (!commandQueue) {
commandList->destroy();
commandList = nullptr;

View File

@@ -85,9 +85,8 @@ ze_result_t DeviceImp::createCommandList(const ze_command_list_desc_t *desc,
auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily;
uint32_t engineGroupIndex = desc->commandQueueGroupOrdinal;
mapOrdinalForAvailableEngineGroup(&engineGroupIndex);
bool useBliter = engineGroupIndex == static_cast<uint32_t>(NEO::EngineGroupType::Copy);
ze_result_t returnValue = ZE_RESULT_SUCCESS;
*commandList = CommandList::create(productFamily, this, useBliter, returnValue);
*commandList = CommandList::create(productFamily, this, static_cast<NEO::EngineGroupType>(engineGroupIndex), returnValue);
return returnValue;
}
@@ -97,9 +96,8 @@ ze_result_t DeviceImp::createCommandListImmediate(const ze_command_queue_desc_t
auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily;
uint32_t engineGroupIndex = desc->ordinal;
mapOrdinalForAvailableEngineGroup(&engineGroupIndex);
bool useBliter = engineGroupIndex == static_cast<uint32_t>(NEO::EngineGroupType::Copy);
ze_result_t returnValue = ZE_RESULT_SUCCESS;
*phCommandList = CommandList::createImmediate(productFamily, this, desc, false, useBliter, returnValue);
*phCommandList = CommandList::createImmediate(productFamily, this, desc, false, static_cast<NEO::EngineGroupType>(engineGroupIndex), returnValue);
return returnValue;
}
@@ -111,7 +109,6 @@ ze_result_t DeviceImp::createCommandQueue(const ze_command_queue_desc_t *desc,
NEO::CommandStreamReceiver *csr = nullptr;
uint32_t engineGroupIndex = desc->ordinal;
mapOrdinalForAvailableEngineGroup(&engineGroupIndex);
bool useBliter = engineGroupIndex == static_cast<uint32_t>(NEO::EngineGroupType::Copy);
auto ret = getCsrForOrdinalAndIndex(&csr, desc->ordinal, desc->index);
if (ret != ZE_RESULT_SUCCESS) {
return ret;
@@ -119,7 +116,7 @@ ze_result_t DeviceImp::createCommandQueue(const ze_command_queue_desc_t *desc,
UNRECOVERABLE_IF(csr == nullptr);
*commandQueue = CommandQueue::create(productFamily, this, csr, desc, useBliter);
*commandQueue = CommandQueue::create(productFamily, this, csr, desc, NEO::EngineGroupType::Copy == static_cast<NEO::EngineGroupType>(engineGroupIndex));
return ZE_RESULT_SUCCESS;
}
@@ -620,7 +617,7 @@ Device *Device::create(DriverHandle *driverHandle, NEO::Device *neoDevice, uint3
ze_result_t returnValue = ZE_RESULT_SUCCESS;
device->pageFaultCommandList =
CommandList::createImmediate(
device->neoDevice->getHardwareInfo().platform.eProductFamily, device, &cmdQueueDesc, true, false, returnValue);
device->neoDevice->getHardwareInfo().platform.eProductFamily, device, &cmdQueueDesc, true, NEO::EngineGroupType::RenderCompute, returnValue);
}
if (device->getSourceLevelDebugger()) {