Add blit copy implementation for L0

Change-Id: I327a4cf977e166cb648ee9f3a79374f7cefa7b1b
Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
Maciej Plewka
2020-04-07 16:50:09 +02:00
committed by sys_ocldev
parent 3ce0450a9c
commit 691a4ea823
30 changed files with 961 additions and 171 deletions

View File

@@ -77,9 +77,12 @@ void CommandList::eraseResidencyContainerEntry(NEO::GraphicsAllocation *allocati
} }
} }
bool CommandList::isCopyOnly() const {
return isCopyOnlyCmdList;
}
NEO::PreemptionMode CommandList::obtainFunctionPreemptionMode(Kernel *kernel) { NEO::PreemptionMode CommandList::obtainFunctionPreemptionMode(Kernel *kernel) {
auto functionAttributes = kernel->getImmutableData()->getDescriptor().kernelAttributes; auto functionAttributes = kernel->getImmutableData()->getDescriptor().kernelAttributes;
NEO::PreemptionFlags flags = {}; NEO::PreemptionFlags flags = {};
flags.flags.disabledMidThreadPreemptionKernel = functionAttributes.flags.requiresDisabledMidThreadPreemption; flags.flags.disabledMidThreadPreemptionKernel = functionAttributes.flags.requiresDisabledMidThreadPreemption;
flags.flags.usesFencesForReadWriteImages = functionAttributes.flags.usesFencesForReadWriteImages; flags.flags.usesFencesForReadWriteImages = functionAttributes.flags.usesFencesForReadWriteImages;

View File

@@ -117,10 +117,10 @@ struct CommandList : _ze_command_list_handle_t {
virtual ze_result_t appendMIBBEnd() = 0; virtual ze_result_t appendMIBBEnd() = 0;
virtual ze_result_t appendMINoop() = 0; virtual ze_result_t appendMINoop() = 0;
static CommandList *create(uint32_t productFamily, Device *device); static CommandList *create(uint32_t productFamily, Device *device, bool isCopyOnly);
static CommandList *createImmediate(uint32_t productFamily, Device *device, static CommandList *createImmediate(uint32_t productFamily, Device *device,
const ze_command_queue_desc_t *desc, const ze_command_queue_desc_t *desc,
bool internalUsage); bool internalUsage, bool isCopyOnly);
static CommandList *fromHandle(ze_command_list_handle_t handle) { static CommandList *fromHandle(ze_command_list_handle_t handle) {
return static_cast<CommandList *>(handle); return static_cast<CommandList *>(handle);
@@ -147,6 +147,7 @@ struct CommandList : _ze_command_list_handle_t {
void removeHostPtrAllocations(); void removeHostPtrAllocations();
void eraseDeallocationContainerEntry(NEO::GraphicsAllocation *allocation); void eraseDeallocationContainerEntry(NEO::GraphicsAllocation *allocation);
void eraseResidencyContainerEntry(NEO::GraphicsAllocation *allocation); void eraseResidencyContainerEntry(NEO::GraphicsAllocation *allocation);
bool isCopyOnly() const;
enum CommandListType : uint32_t { enum CommandListType : uint32_t {
TYPE_REGULAR = 0u, TYPE_REGULAR = 0u,
@@ -161,7 +162,7 @@ struct CommandList : _ze_command_list_handle_t {
std::vector<Kernel *> printfFunctionContainer; std::vector<Kernel *> printfFunctionContainer;
virtual ze_result_t executeCommandListImmediate(bool performMigration) = 0; virtual ze_result_t executeCommandListImmediate(bool performMigration) = 0;
virtual bool initialize(Device *device) = 0; virtual bool initialize(Device *device, bool isCopyOnly) = 0;
virtual ~CommandList(); virtual ~CommandList();
NEO::CommandContainer commandContainer; NEO::CommandContainer commandContainer;
@@ -169,6 +170,7 @@ struct CommandList : _ze_command_list_handle_t {
std::map<const void *, NEO::GraphicsAllocation *> hostPtrMap; std::map<const void *, NEO::GraphicsAllocation *> hostPtrMap;
uint32_t commandListPerThreadScratchSize = 0u; uint32_t commandListPerThreadScratchSize = 0u;
NEO::PreemptionMode commandListPreemptionMode = NEO::PreemptionMode::Initial; NEO::PreemptionMode commandListPreemptionMode = NEO::PreemptionMode::Initial;
bool isCopyOnlyCmdList = false;
}; };
using CommandListAllocatorFn = CommandList *(*)(uint32_t); using CommandListAllocatorFn = CommandList *(*)(uint32_t);

View File

@@ -35,7 +35,7 @@ struct CommandListCoreFamily : CommandListImp {
using CommandListImp::CommandListImp; using CommandListImp::CommandListImp;
bool initialize(Device *device) override; bool initialize(Device *device, bool isCopyOnly) override;
virtual void programL3(bool isSLMused); virtual void programL3(bool isSLMused);
ze_result_t close() override; ze_result_t close() override;
@@ -124,26 +124,39 @@ struct CommandListCoreFamily : CommandListImp {
ze_result_t executeCommandListImmediate(bool performMigration) override; ze_result_t executeCommandListImmediate(bool performMigration) override;
protected: protected:
ze_result_t appendMemoryCopyKernelWithGA(void *dstPtr, NEO::GraphicsAllocation *dstPtrAlloc, MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyKernelWithGA(void *dstPtr, NEO::GraphicsAllocation *dstPtrAlloc,
uint64_t dstOffset, void *srcPtr, uint64_t dstOffset, void *srcPtr,
NEO::GraphicsAllocation *srcPtrAlloc, NEO::GraphicsAllocation *srcPtrAlloc,
uint64_t srcOffset, uint32_t size, uint64_t srcOffset, uint32_t size,
uint32_t elementSize, Builtin builtin); uint32_t elementSize, Builtin builtin);
ze_result_t appendMemoryCopyKernel2d(const void *dstptr, const void *srcptr, MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyBlit(NEO::GraphicsAllocation *dstPtrAlloc,
Builtin builtin, const ze_copy_region_t *dstRegion, uint64_t dstOffset,
uint32_t dstPitch, size_t dstOffset, NEO::GraphicsAllocation *srcPtrAlloc,
const ze_copy_region_t *srcRegion, uint32_t srcPitch, uint64_t srcOffset, uint32_t size);
size_t srcOffset, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents);
ze_result_t appendMemoryCopyKernel3d(const void *dstptr, const void *srcptr, MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyBlitRegion(const void *srcptr,
Builtin builtin, const ze_copy_region_t *dstRegion, const void *dstptr,
uint32_t dstPitch, uint32_t dstSlicePitch, size_t dstOffset, ze_copy_region_t srcRegion,
const ze_copy_region_t *srcRegion, uint32_t srcPitch, ze_copy_region_t dstRegion, Vec3<size_t> copySize,
uint32_t srcSlicePitch, size_t srcOffset, size_t srcRowPitch, size_t srcSlicePitch,
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, size_t dstRowPitch, size_t dstSlicePitch,
ze_event_handle_t *phWaitEvents); size_t srcSize, size_t dstSize);
MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyKernel2d(const void *dstptr, const void *srcptr,
Builtin builtin, const ze_copy_region_t *dstRegion,
uint32_t dstPitch, size_t dstOffset,
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
size_t srcOffset, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents);
MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyKernel3d(const void *dstptr, const void *srcptr,
Builtin builtin, const ze_copy_region_t *dstRegion,
uint32_t dstPitch, uint32_t dstSlicePitch, size_t dstOffset,
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
uint32_t srcSlicePitch, size_t srcOffset,
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents);
ze_result_t appendLaunchKernelWithParams(ze_kernel_handle_t hKernel, ze_result_t appendLaunchKernelWithParams(ze_kernel_handle_t hKernel,
const ze_group_count_t *pThreadGroupDimensions, const ze_group_count_t *pThreadGroupDimensions,
@@ -160,7 +173,7 @@ struct CommandListCoreFamily : CommandListImp {
void appendSignalEventPostWalker(ze_event_handle_t hEvent); void appendSignalEventPostWalker(ze_event_handle_t hEvent);
uint64_t getInputBufferSize(NEO::ImageType imageType, uint64_t bytesPerPixel, const ze_image_region_t *region); uint64_t getInputBufferSize(NEO::ImageType imageType, uint64_t bytesPerPixel, const ze_image_region_t *region);
AlignedAllocationData getAlignedAllocation(Device *device, const void *buffer, uint64_t bufferSize); virtual AlignedAllocationData getAlignedAllocation(Device *device, const void *buffer, uint64_t bufferSize);
ze_result_t addEventsToCmdList(ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents); ze_result_t addEventsToCmdList(ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents);
}; };

View File

@@ -10,6 +10,7 @@
#include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/linear_stream.h"
#include "shared/source/command_stream/preemption.h" #include "shared/source/command_stream/preemption.h"
#include "shared/source/device/device.h" #include "shared/source/device/device.h"
#include "shared/source/helpers/blit_commands_helper.h"
#include "shared/source/helpers/heap_helper.h" #include "shared/source/helpers/heap_helper.h"
#include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/hw_helper.h"
#include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/hw_info.h"
@@ -35,17 +36,19 @@ template <GFXCORE_FAMILY gfxCoreFamily>
struct EncodeStateBaseAddress; struct EncodeStateBaseAddress;
template <GFXCORE_FAMILY gfxCoreFamily> template <GFXCORE_FAMILY gfxCoreFamily>
bool CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device) { bool CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, bool isCopyOnly) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily; using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
if (!commandContainer.initialize(static_cast<DeviceImp *>(device)->neoDevice)) { if (!commandContainer.initialize(static_cast<DeviceImp *>(device)->neoDevice)) {
return false; return false;
} }
NEO::EncodeStateBaseAddress<GfxFamily>::encode(commandContainer); if (!isCopyOnly) {
commandContainer.setDirtyStateForAllHeaps(false); NEO::EncodeStateBaseAddress<GfxFamily>::encode(commandContainer);
commandContainer.setDirtyStateForAllHeaps(false);
}
this->device = device; this->device = device;
this->commandListPreemptionMode = device->getDevicePreemptionMode(); this->commandListPreemptionMode = device->getDevicePreemptionMode();
this->isCopyOnlyCmdList = isCopyOnly;
return true; return true;
} }
@@ -182,7 +185,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
return ZE_RESULT_ERROR_INVALID_ARGUMENT; return ZE_RESULT_ERROR_INVALID_ARGUMENT;
} }
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), false); if (isCopyOnlyCmdList) {
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, false, false);
} else {
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), false);
}
if (hSignalEvent) { if (hSignalEvent) {
this->appendSignalEventPostWalker(hSignalEvent); this->appendSignalEventPostWalker(hSignalEvent);
@@ -565,6 +572,48 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernelWithGA(v
nullptr, 0, nullptr); nullptr, 0, nullptr);
} }
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlit(NEO::GraphicsAllocation *dstPtrAlloc,
uint64_t dstOffset,
NEO::GraphicsAllocation *srcPtrAlloc,
uint64_t srcOffset,
uint32_t size) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
auto blitProperties = NEO::BlitProperties::constructPropertiesForCopyBuffer(dstPtrAlloc, srcPtrAlloc, {dstOffset, 0, 0}, {srcOffset, 0, 0}, {size, 0, 0}, 0, 0, 0, 0);
commandContainer.addToResidencyContainer(dstPtrAlloc);
commandContainer.addToResidencyContainer(srcPtrAlloc);
NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForBuffer(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
return ZE_RESULT_SUCCESS;
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(const void *srcptr,
const void *dstptr,
ze_copy_region_t srcRegion,
ze_copy_region_t dstRegion, Vec3<size_t> copySize,
size_t srcRowPitch, size_t srcSlicePitch,
size_t dstRowPitch, size_t dstSlicePitch,
size_t srcSize, size_t dstSize) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
auto dstAllocationStruct = getAlignedAllocation(this->device, dstptr, dstSize);
auto srcAllocationStruct = getAlignedAllocation(this->device, srcptr, srcSize);
auto srcPtrAlloc = srcAllocationStruct.alloc;
auto dstPtrAlloc = dstAllocationStruct.alloc;
Vec3<size_t> srcPtrOffset = {srcRegion.originX, srcRegion.originY, srcRegion.originZ};
Vec3<size_t> dstPtrOffset = {dstRegion.originX, dstRegion.originY, dstRegion.originZ};
auto blitProperties = NEO::BlitProperties::constructPropertiesForCopyBuffer(dstPtrAlloc, srcPtrAlloc,
dstPtrOffset, srcPtrOffset, copySize, srcRowPitch, srcSlicePitch,
dstRowPitch, dstSlicePitch);
commandContainer.addToResidencyContainer(dstPtrAlloc);
commandContainer.addToResidencyContainer(srcPtrAlloc);
NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForBuffer(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
return ZE_RESULT_SUCCESS;
}
template <GFXCORE_FAMILY gfxCoreFamily> template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendPageFaultCopy(NEO::GraphicsAllocation *dstptr, ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendPageFaultCopy(NEO::GraphicsAllocation *dstptr,
NEO::GraphicsAllocation *srcptr, NEO::GraphicsAllocation *srcptr,
@@ -644,31 +693,37 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
appendEventForProfiling(hSignalEvent, true); appendEventForProfiling(hSignalEvent, true);
if (ret == ZE_RESULT_SUCCESS && leftSize) { if (ret == ZE_RESULT_SUCCESS && leftSize) {
ret = appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr), ret = isCopyOnlyCmdList ? appendMemoryCopyBlit(dstAllocationStruct.alloc, dstAllocationStruct.offset,
dstAllocationStruct.alloc, dstAllocationStruct.offset, srcAllocationStruct.alloc, srcAllocationStruct.offset, static_cast<uint32_t>(leftSize))
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr), : appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
srcAllocationStruct.alloc, srcAllocationStruct.offset, dstAllocationStruct.alloc, dstAllocationStruct.offset,
static_cast<uint32_t>(leftSize), 1, reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
Builtin::CopyBufferToBufferSide); srcAllocationStruct.alloc, srcAllocationStruct.offset,
static_cast<uint32_t>(leftSize), 1,
Builtin::CopyBufferToBufferSide);
} }
if (ret == ZE_RESULT_SUCCESS && middleSizeBytes) { if (ret == ZE_RESULT_SUCCESS && middleSizeBytes) {
ret = appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr), ret = isCopyOnlyCmdList ? appendMemoryCopyBlit(dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset,
dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset, srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset, static_cast<uint32_t>(middleSizeBytes))
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr), : appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset, dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset,
static_cast<uint32_t>(middleSizeBytes), reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
static_cast<uint32_t>(middleElSize), srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset,
Builtin::CopyBufferToBufferMiddle); static_cast<uint32_t>(middleSizeBytes),
static_cast<uint32_t>(middleElSize),
Builtin::CopyBufferToBufferMiddle);
} }
if (ret == ZE_RESULT_SUCCESS && rightSize) { if (ret == ZE_RESULT_SUCCESS && rightSize) {
ret = appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr), ret = isCopyOnlyCmdList ? appendMemoryCopyBlit(dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset,
dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset, srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset, static_cast<uint32_t>(rightSize))
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr), : appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset, dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset,
static_cast<uint32_t>(rightSize), 1u, reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
Builtin::CopyBufferToBufferSide); srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset,
static_cast<uint32_t>(rightSize), 1u,
Builtin::CopyBufferToBufferSide);
} }
if (hSignalEvent) { if (hSignalEvent) {
@@ -743,13 +798,15 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
ze_result_t result = ZE_RESULT_SUCCESS; ze_result_t result = ZE_RESULT_SUCCESS;
if (srcRegion->depth > 1) { if (srcRegion->depth > 1) {
result = this->appendMemoryCopyKernel3d(alignedDstPtr, alignedSrcPtr, result = isCopyOnlyCmdList ? appendMemoryCopyBlitRegion(alignedSrcPtr, alignedDstPtr, *srcRegion, *dstRegion, {srcRegion->width, srcRegion->height, srcRegion->depth}, srcPitch, srcSlicePitch, dstPitch, dstSlicePitch, srcSize, dstSize)
Builtin::CopyBufferRectBytes3d, dstRegion, dstPitch, dstSlicePitch, dstOffset, : this->appendMemoryCopyKernel3d(alignedDstPtr, alignedSrcPtr,
srcRegion, srcPitch, srcSlicePitch, srcOffset, hSignalEvent, 0, nullptr); Builtin::CopyBufferRectBytes3d, dstRegion, dstPitch, dstSlicePitch, dstOffset,
srcRegion, srcPitch, srcSlicePitch, srcOffset, hSignalEvent, 0, nullptr);
} else { } else {
result = this->appendMemoryCopyKernel2d(alignedDstPtr, alignedSrcPtr, result = isCopyOnlyCmdList ? appendMemoryCopyBlitRegion(alignedSrcPtr, alignedDstPtr, *srcRegion, *dstRegion, {srcRegion->width, srcRegion->height, srcRegion->depth}, srcPitch, srcSlicePitch, dstPitch, dstSlicePitch, srcSize, dstSize)
Builtin::CopyBufferRectBytes2d, dstRegion, dstPitch, dstOffset, : this->appendMemoryCopyKernel2d(alignedDstPtr, alignedSrcPtr,
srcRegion, srcPitch, srcOffset, hSignalEvent, 0, nullptr); Builtin::CopyBufferRectBytes2d, dstRegion, dstPitch, dstOffset,
srcRegion, srcPitch, srcOffset, hSignalEvent, 0, nullptr);
} }
if (result) { if (result) {
@@ -757,7 +814,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
} }
if (hostPointerNeedsFlush) { if (hostPointerNeedsFlush) {
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), true); if (isCopyOnlyCmdList) {
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, false, false);
} else {
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), true);
}
} }
return ZE_RESULT_SUCCESS; return ZE_RESULT_SUCCESS;
@@ -1091,12 +1152,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
auto event = Event::fromHandle(hEvent); auto event = Event::fromHandle(hEvent);
commandContainer.addToResidencyContainer(&event->getAllocation()); commandContainer.addToResidencyContainer(&event->getAllocation());
if (isCopyOnlyCmdList) {
bool dcFlushEnable = (event->signalScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true; NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), event->getGpuAddress(), Event::STATE_SIGNALED, false, true);
NEO::MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation( } else {
*commandContainer.getCommandStream(), POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, bool dcFlushEnable = (event->signalScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true;
event->getGpuAddress(), Event::STATE_SIGNALED, dcFlushEnable, commandContainer.getDevice()->getHardwareInfo()); NEO::MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(
*commandContainer.getCommandStream(), POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
event->getGpuAddress(), Event::STATE_SIGNALED, dcFlushEnable, commandContainer.getDevice()->getHardwareInfo());
}
return ZE_RESULT_SUCCESS; return ZE_RESULT_SUCCESS;
} }
@@ -1127,7 +1190,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
bool dcFlushEnable = (event->waitScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true; bool dcFlushEnable = (event->waitScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true;
if (dcFlushEnable) { if (dcFlushEnable) {
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), true); if (isCopyOnlyCmdList) {
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, false, false);
} else {
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), true);
}
} }
} }

View File

@@ -101,19 +101,22 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(ze_event_hand
timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::CONTEXT_START); timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::CONTEXT_START);
NEO::EncodeStoreMMIO<GfxFamily>::encode(commandContainer, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timeStampAddress); NEO::EncodeStoreMMIO<GfxFamily>::encode(commandContainer, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timeStampAddress);
} else { } else {
timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::CONTEXT_END); timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::CONTEXT_END);
NEO::EncodeStoreMMIO<GfxFamily>::encode(commandContainer, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timeStampAddress); NEO::EncodeStoreMMIO<GfxFamily>::encode(commandContainer, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timeStampAddress);
timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::GLOBAL_END); timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::GLOBAL_END);
bool dcFlushEnable = (event->signalScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true; bool dcFlushEnable = (event->signalScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true;
NEO::MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation( if (isCopyOnlyCmdList) {
*(commandContainer.getCommandStream()), POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP, NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), timeStampAddress, 0llu, true, true);
timeStampAddress, } else {
0llu, NEO::MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(
dcFlushEnable, *(commandContainer.getCommandStream()), POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP,
device->getHwInfo()); timeStampAddress,
0llu,
dcFlushEnable,
device->getHwInfo());
}
} }
} }
} // namespace L0 } // namespace L0

View File

@@ -10,6 +10,7 @@
#include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/linear_stream.h"
#include "shared/source/device/device.h" #include "shared/source/device/device.h"
#include "shared/source/helpers/engine_node_helper.h"
#include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/indirect_heap/indirect_heap.h"
#include "level_zero/core/source/device/device.h" #include "level_zero/core/source/device/device.h"
@@ -48,7 +49,7 @@ ze_result_t CommandListImp::appendMetricQueryEnd(zet_metric_query_handle_t hMetr
return MetricQuery::fromHandle(hMetricQuery)->appendEnd(*this, hCompletionEvent); return MetricQuery::fromHandle(hMetricQuery)->appendEnd(*this, hCompletionEvent);
} }
CommandList *CommandList::create(uint32_t productFamily, Device *device) { CommandList *CommandList::create(uint32_t productFamily, Device *device, bool isCopyOnly) {
CommandListAllocatorFn allocator = nullptr; CommandListAllocatorFn allocator = nullptr;
if (productFamily < IGFX_MAX_PRODUCT) { if (productFamily < IGFX_MAX_PRODUCT) {
allocator = commandListFactory[productFamily]; allocator = commandListFactory[productFamily];
@@ -58,24 +59,27 @@ CommandList *CommandList::create(uint32_t productFamily, Device *device) {
if (allocator) { if (allocator) {
commandList = static_cast<CommandListImp *>((*allocator)(CommandList::defaultNumIddsPerBlock)); commandList = static_cast<CommandListImp *>((*allocator)(CommandList::defaultNumIddsPerBlock));
commandList->initialize(device); commandList->initialize(device, isCopyOnly);
} }
return commandList; return commandList;
} }
CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device, CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device,
const ze_command_queue_desc_t *desc, const ze_command_queue_desc_t *desc,
bool internalUsage) { bool internalUsage, bool isCopyOnly) {
auto deviceImp = static_cast<DeviceImp *>(device); auto deviceImp = static_cast<DeviceImp *>(device);
NEO::CommandStreamReceiver *csr = nullptr; NEO::CommandStreamReceiver *csr = nullptr;
if (internalUsage) { if (internalUsage) {
csr = deviceImp->neoDevice->getInternalEngine().commandStreamReceiver; csr = deviceImp->neoDevice->getInternalEngine().commandStreamReceiver;
} else if (isCopyOnly) {
auto &selectorCopyEngine = deviceImp->neoDevice->getDeviceById(0)->getSelectorCopyEngine();
csr = deviceImp->neoDevice->getDeviceById(0)->getEngine(NEO::EngineHelpers::getBcsEngineType(deviceImp->neoDevice->getHardwareInfo(), selectorCopyEngine), false).commandStreamReceiver;
} else { } else {
csr = deviceImp->neoDevice->getDefaultEngine().commandStreamReceiver; csr = deviceImp->neoDevice->getDefaultEngine().commandStreamReceiver;
} }
auto commandQueue = CommandQueue::create(productFamily, device, csr, desc); auto commandQueue = CommandQueue::create(productFamily, device, csr, desc, isCopyOnly);
if (!commandQueue) { if (!commandQueue) {
return nullptr; return nullptr;
} }
@@ -89,7 +93,7 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device
if (allocator) { if (allocator) {
commandList = static_cast<CommandListImp *>((*allocator)(CommandList::commandListimmediateIddsPerBlock)); commandList = static_cast<CommandListImp *>((*allocator)(CommandList::commandListimmediateIddsPerBlock));
commandList->initialize(device); commandList->initialize(device, isCopyOnly);
} }
if (!commandList) { if (!commandList) {

View File

@@ -28,13 +28,14 @@ ze_result_t CommandQueueImp::destroy() {
return ZE_RESULT_SUCCESS; return ZE_RESULT_SUCCESS;
} }
void CommandQueueImp::initialize() { void CommandQueueImp::initialize(bool copyOnly) {
buffers.initialize(device, totalCmdBufferSize); buffers.initialize(device, totalCmdBufferSize);
NEO::GraphicsAllocation *bufferAllocation = buffers.getCurrentBufferAllocation(); NEO::GraphicsAllocation *bufferAllocation = buffers.getCurrentBufferAllocation();
commandStream = new NEO::LinearStream(bufferAllocation->getUnderlyingBuffer(), commandStream = new NEO::LinearStream(bufferAllocation->getUnderlyingBuffer(),
defaultQueueCmdBufferSize); defaultQueueCmdBufferSize);
UNRECOVERABLE_IF(commandStream == nullptr); UNRECOVERABLE_IF(commandStream == nullptr);
commandStream->replaceGraphicsAllocation(bufferAllocation); commandStream->replaceGraphicsAllocation(bufferAllocation);
isCopyOnlyCommandQueue = copyOnly;
} }
void CommandQueueImp::reserveLinearStreamSize(size_t size) { void CommandQueueImp::reserveLinearStreamSize(size_t size) {
@@ -91,7 +92,7 @@ void CommandQueueImp::printFunctionsPrintfOutput() {
} }
CommandQueue *CommandQueue::create(uint32_t productFamily, Device *device, NEO::CommandStreamReceiver *csr, CommandQueue *CommandQueue::create(uint32_t productFamily, Device *device, NEO::CommandStreamReceiver *csr,
const ze_command_queue_desc_t *desc) { const ze_command_queue_desc_t *desc, bool isCopyOnly) {
CommandQueueAllocatorFn allocator = nullptr; CommandQueueAllocatorFn allocator = nullptr;
if (productFamily < IGFX_MAX_PRODUCT) { if (productFamily < IGFX_MAX_PRODUCT) {
allocator = commandQueueFactory[productFamily]; allocator = commandQueueFactory[productFamily];
@@ -101,7 +102,7 @@ CommandQueue *CommandQueue::create(uint32_t productFamily, Device *device, NEO::
if (allocator) { if (allocator) {
commandQueue = static_cast<CommandQueueImp *>((*allocator)(device, csr, desc)); commandQueue = static_cast<CommandQueueImp *>((*allocator)(device, csr, desc));
commandQueue->initialize(); commandQueue->initialize(isCopyOnly);
} }
return commandQueue; return commandQueue;
} }

View File

@@ -42,7 +42,7 @@ struct CommandQueue : _ze_command_queue_handle_t {
virtual ze_result_t synchronize(uint32_t timeout) = 0; virtual ze_result_t synchronize(uint32_t timeout) = 0;
static CommandQueue *create(uint32_t productFamily, Device *device, NEO::CommandStreamReceiver *csr, static CommandQueue *create(uint32_t productFamily, Device *device, NEO::CommandStreamReceiver *csr,
const ze_command_queue_desc_t *desc); const ze_command_queue_desc_t *desc, bool isCopyOnly);
static CommandQueue *fromHandle(ze_command_queue_handle_t handle) { static CommandQueue *fromHandle(ze_command_queue_handle_t handle) {
return static_cast<CommandQueue *>(handle); return static_cast<CommandQueue *>(handle);
@@ -58,6 +58,7 @@ struct CommandQueue : _ze_command_queue_handle_t {
std::atomic<uint32_t> commandQueuePerThreadScratchSize; std::atomic<uint32_t> commandQueuePerThreadScratchSize;
NEO::PreemptionMode commandQueuePreemptionMode = NEO::PreemptionMode::Initial; NEO::PreemptionMode commandQueuePreemptionMode = NEO::PreemptionMode::Initial;
bool commandQueueDebugCmdsProgrammed = false; bool commandQueueDebugCmdsProgrammed = false;
bool isCopyOnlyCommandQueue = false;
}; };
using CommandQueueAllocatorFn = CommandQueue *(*)(Device *device, NEO::CommandStreamReceiver *csr, using CommandQueueAllocatorFn = CommandQueue *(*)(Device *device, NEO::CommandStreamReceiver *csr,

View File

@@ -62,6 +62,13 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
for (auto i = 0u; i < numCommandLists; i++) {
auto commandList = CommandList::fromHandle(phCommandLists[i]);
if (isCopyOnlyCommandQueue != commandList->isCopyOnly()) {
return ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE;
}
}
size_t spaceForResidency = 0; size_t spaceForResidency = 0;
size_t preemptionSize = 0u; size_t preemptionSize = 0u;
size_t debuggerCmdsSize = 0; size_t debuggerCmdsSize = 0;
@@ -122,7 +129,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
if (hFence) { if (hFence) {
fence = Fence::fromHandle(hFence); fence = Fence::fromHandle(hFence);
spaceForResidency += residencyContainerSpaceForFence; spaceForResidency += residencyContainerSpaceForFence;
linearStreamSizeEstimate += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(device->getHwInfo()); linearStreamSizeEstimate += isCopyOnlyCommandQueue ? NEO::EncodeMiFlushDW<GfxFamily>::getMiFlushDwCmdSizeForDataWrite() : NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(device->getHwInfo());
} }
spaceForResidency += residencyContainerSpaceForTagWrite; spaceForResidency += residencyContainerSpaceForTagWrite;
@@ -138,70 +145,72 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
gsbaStateDirty |= !gsbaInit; gsbaStateDirty |= !gsbaInit;
frontEndStateDirty |= !frontEndInit; frontEndStateDirty |= !frontEndInit;
if (!isCopyOnlyCommandQueue) {
if (!gpgpuEnabled) { if (!gpgpuEnabled) {
linearStreamSizeEstimate += estimatePipelineSelect(); linearStreamSizeEstimate += estimatePipelineSelect();
}
if (frontEndStateDirty) {
linearStreamSizeEstimate += estimateFrontEndCmdSize();
}
if (gsbaStateDirty) {
linearStreamSizeEstimate += estimateStateBaseAddressCmdSize();
}
linearStreamSizeEstimate += preemptionSize + debuggerCmdsSize;
} }
if (frontEndStateDirty) { linearStreamSizeEstimate += isCopyOnlyCommandQueue ? NEO::EncodeMiFlushDW<GfxFamily>::getMiFlushDwCmdSizeForDataWrite() : NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(device->getHwInfo());
linearStreamSizeEstimate += estimateFrontEndCmdSize();
}
if (gsbaStateDirty) {
linearStreamSizeEstimate += estimateStateBaseAddressCmdSize();
}
linearStreamSizeEstimate += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(device->getHwInfo());
linearStreamSizeEstimate += preemptionSize + debuggerCmdsSize;
size_t alignedSize = alignUp<size_t>(linearStreamSizeEstimate, minCmdBufferPtrAlign); size_t alignedSize = alignUp<size_t>(linearStreamSizeEstimate, minCmdBufferPtrAlign);
size_t padding = alignedSize - linearStreamSizeEstimate; size_t padding = alignedSize - linearStreamSizeEstimate;
reserveLinearStreamSize(alignedSize); reserveLinearStreamSize(alignedSize);
NEO::LinearStream child(commandStream->getSpace(alignedSize), alignedSize); NEO::LinearStream child(commandStream->getSpace(alignedSize), alignedSize);
if (!gpgpuEnabled) { if (!isCopyOnlyCommandQueue) {
programPipelineSelect(child); if (!gpgpuEnabled) {
} programPipelineSelect(child);
}
if (!commandQueueDebugCmdsProgrammed && neoDevice->isDebuggerActive()) { if (!commandQueueDebugCmdsProgrammed && neoDevice->isDebuggerActive()) {
NEO::PreambleHelper<GfxFamily>::programKernelDebugging(&child); NEO::PreambleHelper<GfxFamily>::programKernelDebugging(&child);
commandQueueDebugCmdsProgrammed = true; commandQueueDebugCmdsProgrammed = true;
} }
if (frontEndStateDirty) { if (frontEndStateDirty) {
programFrontEnd(scratchSpaceController->getScratchPatchAddress(), child); programFrontEnd(scratchSpaceController->getScratchPatchAddress(), child);
} }
if (gsbaStateDirty) { if (gsbaStateDirty) {
programGeneralStateBaseAddress(scratchSpaceController->calculateNewGSH(), child); programGeneralStateBaseAddress(scratchSpaceController->calculateNewGSH(), child);
} }
if (commandQueuePreemptionMode == NEO::PreemptionMode::Initial) { if (commandQueuePreemptionMode == NEO::PreemptionMode::Initial) {
NEO::PreemptionHelper::programCsrBaseAddress<GfxFamily>(child, *neoDevice, csr->getPreemptionAllocation()); NEO::PreemptionHelper::programCsrBaseAddress<GfxFamily>(child, *neoDevice, csr->getPreemptionAllocation());
NEO::PreemptionHelper::programStateSip<GfxFamily>(child, *neoDevice); NEO::PreemptionHelper::programStateSip<GfxFamily>(child, *neoDevice);
NEO::PreemptionHelper::programCmdStream<GfxFamily>(child, NEO::PreemptionHelper::programCmdStream<GfxFamily>(child,
devicePreemption, devicePreemption,
commandQueuePreemptionMode, commandQueuePreemptionMode,
csr->getPreemptionAllocation()); csr->getPreemptionAllocation());
commandQueuePreemptionMode = devicePreemption; commandQueuePreemptionMode = devicePreemption;
statePreemption = commandQueuePreemptionMode; statePreemption = commandQueuePreemptionMode;
} }
const bool sipKernelUsed = devicePreemption == NEO::PreemptionMode::MidThread || const bool sipKernelUsed = devicePreemption == NEO::PreemptionMode::MidThread ||
neoDevice->isDebuggerActive(); neoDevice->isDebuggerActive();
if (devicePreemption == NEO::PreemptionMode::MidThread) { if (devicePreemption == NEO::PreemptionMode::MidThread) {
residencyContainer.push_back(csr->getPreemptionAllocation()); residencyContainer.push_back(csr->getPreemptionAllocation());
} }
if (sipKernelUsed) { if (sipKernelUsed) {
auto sipIsa = NEO::SipKernel::getSipKernelAllocation(*neoDevice); auto sipIsa = NEO::SipKernel::getSipKernelAllocation(*neoDevice);
residencyContainer.push_back(sipIsa); residencyContainer.push_back(sipIsa);
} }
if (neoDevice->isDebuggerActive()) { if (neoDevice->isDebuggerActive()) {
residencyContainer.push_back(device->getDebugSurface()); residencyContainer.push_back(device->getDebugSurface());
}
} }
for (auto i = 0u; i < numCommandLists; ++i) { for (auto i = 0u; i < numCommandLists; ++i) {
auto commandList = CommandList::fromHandle(phCommandLists[i]); auto commandList = CommandList::fromHandle(phCommandLists[i]);
auto cmdBufferAllocations = commandList->commandContainer.getCmdBufferAllocations(); auto cmdBufferAllocations = commandList->commandContainer.getCmdBufferAllocations();
@@ -254,12 +263,17 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
if (hFence) { if (hFence) {
residencyContainer.push_back(&fence->getAllocation()); residencyContainer.push_back(&fence->getAllocation());
NEO::MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation( if (isCopyOnlyCommandQueue) {
child, POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(child, fence->getGpuAddress(), Fence::STATE_SIGNALED, false, true);
fence->getGpuAddress(), Fence::STATE_SIGNALED, true, device->getHwInfo()); } else {
NEO::MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(
child, POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
fence->getGpuAddress(), Fence::STATE_SIGNALED, true, device->getHwInfo());
}
} }
dispatchTaskCountWrite(child, true); dispatchTaskCountWrite(child, true);
residencyContainer.push_back(csr->getTagAllocation()); residencyContainer.push_back(csr->getTagAllocation());
void *endingCmd = nullptr; void *endingCmd = nullptr;
if (directSubmissionEnabled) { if (directSubmissionEnabled) {
@@ -334,8 +348,12 @@ void CommandQueueHw<gfxCoreFamily>::dispatchTaskCountWrite(NEO::LinearStream &co
auto taskCountToWrite = csr->peekTaskCount() + 1; auto taskCountToWrite = csr->peekTaskCount() + 1;
auto gpuAddress = static_cast<uint64_t>(csr->getTagAllocation()->getGpuAddress()); auto gpuAddress = static_cast<uint64_t>(csr->getTagAllocation()->getGpuAddress());
NEO::MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation( if (isCopyOnlyCommandQueue) {
commandStream, POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(commandStream, gpuAddress, taskCountToWrite, false, true);
gpuAddress, taskCountToWrite, true, device->getHwInfo()); } else {
NEO::MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(
commandStream, POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
gpuAddress, taskCountToWrite, true, device->getHwInfo());
}
} }
} // namespace L0 } // namespace L0

View File

@@ -68,7 +68,7 @@ struct CommandQueueImp : public CommandQueue {
ze_result_t synchronize(uint32_t timeout) override; ze_result_t synchronize(uint32_t timeout) override;
void initialize(); void initialize(bool copyOnly);
Device *getDevice() { return device; } Device *getDevice() { return device; }

View File

@@ -15,6 +15,7 @@
#include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/constants.h" #include "shared/source/helpers/constants.h"
#include "shared/source/helpers/engine_node_helper.h"
#include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/hw_helper.h"
#include "shared/source/helpers/string.h" #include "shared/source/helpers/string.h"
#include "shared/source/kernel/grf_config.h" #include "shared/source/kernel/grf_config.h"
@@ -77,7 +78,12 @@ ze_result_t DeviceImp::canAccessPeer(ze_device_handle_t hPeerDevice, ze_bool_t *
ze_result_t DeviceImp::createCommandList(const ze_command_list_desc_t *desc, ze_result_t DeviceImp::createCommandList(const ze_command_list_desc_t *desc,
ze_command_list_handle_t *commandList) { ze_command_list_handle_t *commandList) {
auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily; auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily;
*commandList = CommandList::create(productFamily, this); bool useBliter = false;
auto ret = isCreatedCommandListCopyOnly(desc, &useBliter, ZE_COMMAND_LIST_FLAG_COPY_ONLY);
if (ret != ZE_RESULT_SUCCESS) {
return ret;
}
*commandList = CommandList::create(productFamily, this, useBliter);
return ZE_RESULT_SUCCESS; return ZE_RESULT_SUCCESS;
} }
@@ -85,7 +91,14 @@ ze_result_t DeviceImp::createCommandList(const ze_command_list_desc_t *desc,
ze_result_t DeviceImp::createCommandListImmediate(const ze_command_queue_desc_t *desc, ze_result_t DeviceImp::createCommandListImmediate(const ze_command_queue_desc_t *desc,
ze_command_list_handle_t *phCommandList) { ze_command_list_handle_t *phCommandList) {
auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily; auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily;
*phCommandList = CommandList::createImmediate(productFamily, this, desc, false);
bool useBliter = false;
auto ret = isCreatedCommandListCopyOnly(desc, &useBliter, ZE_COMMAND_QUEUE_FLAG_COPY_ONLY);
if (ret != ZE_RESULT_SUCCESS) {
return ret;
}
*phCommandList = CommandList::createImmediate(productFamily, this, desc, false, useBliter);
return ZE_RESULT_SUCCESS; return ZE_RESULT_SUCCESS;
} }
@@ -94,9 +107,19 @@ ze_result_t DeviceImp::createCommandQueue(const ze_command_queue_desc_t *desc,
ze_command_queue_handle_t *commandQueue) { ze_command_queue_handle_t *commandQueue) {
auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily; auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily;
auto csr = neoDevice->getDefaultEngine().commandStreamReceiver; NEO::CommandStreamReceiver *csr = nullptr;
bool useBliter = false;
*commandQueue = CommandQueue::create(productFamily, this, csr, desc); auto ret = isCreatedCommandListCopyOnly(desc, &useBliter, ZE_COMMAND_QUEUE_FLAG_COPY_ONLY);
if (ret != ZE_RESULT_SUCCESS) {
return ret;
}
if (useBliter) {
auto &selectorCopyEngine = this->neoDevice->getDeviceById(0)->getSelectorCopyEngine();
csr = this->neoDevice->getDeviceById(0)->getEngine(NEO::EngineHelpers::getBcsEngineType(neoDevice->getHardwareInfo(), selectorCopyEngine), false).commandStreamReceiver;
} else {
csr = neoDevice->getDefaultEngine().commandStreamReceiver;
}
*commandQueue = CommandQueue::create(productFamily, this, csr, desc, useBliter);
return ZE_RESULT_SUCCESS; return ZE_RESULT_SUCCESS;
} }
@@ -515,7 +538,7 @@ ze_result_t DeviceImp::registerCLCommandQueue(cl_context context, cl_command_que
auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily; auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily;
auto csr = neoDevice->getDefaultEngine().commandStreamReceiver; auto csr = neoDevice->getDefaultEngine().commandStreamReceiver;
*phCommandQueue = CommandQueue::create(productFamily, this, csr, &desc); *phCommandQueue = CommandQueue::create(productFamily, this, csr, &desc, false);
return ZE_RESULT_SUCCESS; return ZE_RESULT_SUCCESS;
} }
@@ -574,7 +597,7 @@ Device *Device::create(DriverHandle *driverHandle, NEO::Device *neoDevice) {
cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
device->pageFaultCommandList = device->pageFaultCommandList =
CommandList::createImmediate( CommandList::createImmediate(
device->neoDevice->getHardwareInfo().platform.eProductFamily, device, &cmdQueueDesc, true); device->neoDevice->getHardwareInfo().platform.eProductFamily, device, &cmdQueueDesc, true, false);
} }
if (neoDevice->getDeviceInfo().debuggerActive) { if (neoDevice->getDeviceInfo().debuggerActive) {
@@ -696,4 +719,17 @@ NEO::GraphicsAllocation *DeviceImp::allocateMemoryFromHostPtr(const void *buffer
return allocation; return allocation;
} }
template <typename DescriptionType, typename ExpectedFlagType>
ze_result_t DeviceImp::isCreatedCommandListCopyOnly(const DescriptionType *desc, bool *useBliter, ExpectedFlagType flag) {
if (desc->flags & flag) {
auto hwInfo = neoDevice->getHardwareInfo();
if (hwInfo.capabilityTable.blitterOperationsSupported) {
*useBliter = true;
return ZE_RESULT_SUCCESS;
}
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
}
return ZE_RESULT_SUCCESS;
}
} // namespace L0 } // namespace L0

View File

@@ -93,6 +93,8 @@ struct DeviceImp : public Device {
CommandList *pageFaultCommandList = nullptr; CommandList *pageFaultCommandList = nullptr;
protected: protected:
template <typename DescriptionType, typename ExpectedFlagType>
ze_result_t isCreatedCommandListCopyOnly(const DescriptionType *desc, bool *useBliter, ExpectedFlagType flag);
NEO::GraphicsAllocation *debugSurface = nullptr; NEO::GraphicsAllocation *debugSurface = nullptr;
}; };

View File

@@ -50,7 +50,7 @@ struct Event : _ze_event_handle_t {
inline ze_event_handle_t toHandle() { return this; } inline ze_event_handle_t toHandle() { return this; }
NEO::GraphicsAllocation &getAllocation(); virtual NEO::GraphicsAllocation &getAllocation();
uint64_t getGpuAddress() { return gpuAddress; } uint64_t getGpuAddress() { return gpuAddress; }
uint64_t getOffsetOfEventTimestampRegister(uint32_t eventTimestampReg); uint64_t getOffsetOfEventTimestampRegister(uint32_t eventTimestampReg);
@@ -97,7 +97,7 @@ struct EventPool : _ze_event_pool_handle_t {
inline ze_event_pool_handle_t toHandle() { return this; } inline ze_event_pool_handle_t toHandle() { return this; }
NEO::GraphicsAllocation &getAllocation() { return *eventPoolAllocation; } virtual NEO::GraphicsAllocation &getAllocation() { return *eventPoolAllocation; }
virtual uint32_t getEventSize() = 0; virtual uint32_t getEventSize() = 0;
virtual uint32_t getNumEventTimestampsToRead() = 0; virtual uint32_t getNumEventTimestampsToRead() = 0;

View File

@@ -22,7 +22,7 @@ using IsSKLOrKBL = IsWithinProducts<IGFX_SKYLAKE, IGFX_KABYLAKE>;
HWTEST2_F(CommandListAppendLaunchKernel, givenKernelWithSLMThenL3IsProgrammedWithSLMValue, IsSKLOrKBL) { HWTEST2_F(CommandListAppendLaunchKernel, givenKernelWithSLMThenL3IsProgrammedWithSLMValue, IsSKLOrKBL) {
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
createKernel(); createKernel();
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device)); std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, false));
ze_group_count_t groupCount{1, 1, 1}; ze_group_count_t groupCount{1, 1, 1};
EXPECT_LE(0u, kernel->kernelImmData->getDescriptor().kernelAttributes.slmInlineSize); EXPECT_LE(0u, kernel->kernelImmData->getDescriptor().kernelAttributes.slmInlineSize);

View File

@@ -20,6 +20,8 @@ set(L0_MOCKS_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/mock_driver.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_driver.cpp
${CMAKE_CURRENT_SOURCE_DIR}/mock_driver_handle.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_driver_handle.h
${CMAKE_CURRENT_SOURCE_DIR}/mock_driver_handle.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_driver_handle.cpp
${CMAKE_CURRENT_SOURCE_DIR}/mock_event.cpp
${CMAKE_CURRENT_SOURCE_DIR}/mock_event.h
${CMAKE_CURRENT_SOURCE_DIR}/mock_kernel.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_kernel.h
${CMAKE_CURRENT_SOURCE_DIR}/mock_memory_manager.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_memory_manager.h
${CMAKE_CURRENT_SOURCE_DIR}/mock_memory_manager.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_memory_manager.cpp

View File

@@ -32,6 +32,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
using BaseClass = ::L0::CommandListCoreFamily<gfxCoreFamily>; using BaseClass = ::L0::CommandListCoreFamily<gfxCoreFamily>;
using BaseClass::appendLaunchKernelWithParams; using BaseClass::appendLaunchKernelWithParams;
using BaseClass::commandListPreemptionMode; using BaseClass::commandListPreemptionMode;
using BaseClass::getAlignedAllocation;
WhiteBox() : ::L0::CommandListCoreFamily<gfxCoreFamily>(BaseClass::defaultNumIddsPerBlock) {} WhiteBox() : ::L0::CommandListCoreFamily<gfxCoreFamily>(BaseClass::defaultNumIddsPerBlock) {}
virtual ~WhiteBox() {} virtual ~WhiteBox() {}
@@ -149,7 +150,7 @@ struct Mock<CommandList> : public CommandList {
MOCK_METHOD0(appendMIBBEnd, ze_result_t()); MOCK_METHOD0(appendMIBBEnd, ze_result_t());
MOCK_METHOD0(appendMINoop, ze_result_t()); MOCK_METHOD0(appendMINoop, ze_result_t());
MOCK_METHOD1(executeCommandListImmediate, ze_result_t(bool perforMigration)); MOCK_METHOD1(executeCommandListImmediate, ze_result_t(bool perforMigration));
MOCK_METHOD1(initialize, bool(L0::Device *device)); MOCK_METHOD2(initialize, bool(L0::Device *device, bool onlyCopyBlit));
uint8_t *batchBuffer = nullptr; uint8_t *batchBuffer = nullptr;
NEO::GraphicsAllocation *mockAllocation = nullptr; NEO::GraphicsAllocation *mockAllocation = nullptr;

View File

@@ -0,0 +1,31 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "mock_event.h"
#include <vector>
namespace L0 {
namespace ult {
Mock<Event>::Mock() : mockAllocation(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
&memory, reinterpret_cast<uint64_t>(&memory), 0, sizeof(memory),
MemoryPool::System4KBPages) { allocation = &mockAllocation; }
Mock<Event>::~Mock() {}
Mock<EventPool>::Mock() : pool(1) {
pool = std::vector<int>(1);
pool[0] = 0;
EXPECT_CALL(*this, getPoolSize()).WillRepeatedly(testing::Return(1));
}
Mock<EventPool>::~Mock() { pool.clear(); }
} // namespace ult
} // namespace L0

View File

@@ -0,0 +1,86 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "level_zero/core/source/device/device.h"
#include "level_zero/core/source/event/event.h"
#include "level_zero/core/test/unit_tests/mock.h"
#include "level_zero/core/test/unit_tests/white_box.h"
#include <vector>
#if defined(__clang__)
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Winconsistent-missing-override"
#endif
namespace L0 {
namespace ult {
template <>
struct WhiteBox<::L0::Event> : public ::L0::Event {
using BaseClass = ::L0::Event;
using BaseClass::allocation;
};
using Event = WhiteBox<::L0::Event>;
template <>
struct WhiteBox<::L0::EventPool> : public ::L0::EventPool {
using BaseClass = ::L0::EventPool;
};
using EventPool = WhiteBox<::L0::EventPool>;
template <>
struct Mock<Event> : public Event {
Mock();
~Mock() override;
MOCK_METHOD3(create, L0::Event *(::L0::EventPool *eventPool, const ze_event_desc_t *desc, ::L0::Device *device));
MOCK_METHOD0(destroy, ze_result_t());
MOCK_METHOD0(hostSignal, ze_result_t());
MOCK_METHOD1(hostSynchronize, ze_result_t(uint32_t timeout));
MOCK_METHOD0(queryStatus, ze_result_t());
MOCK_METHOD0(reset, ze_result_t());
MOCK_METHOD2(getTimestamp, ze_result_t(ze_event_timestamp_type_t timestampType, void *dstptr));
// Fake an allocation for event memory
alignas(16) uint32_t memory = -1;
NEO::GraphicsAllocation mockAllocation;
using Event::allocation;
};
template <>
struct Mock<EventPool> : public EventPool {
Mock();
~Mock() override;
MOCK_METHOD0(destroy, ze_result_t());
MOCK_METHOD0(getPoolSize, size_t());
MOCK_METHOD0(getPoolUsedCount, uint32_t());
MOCK_METHOD1(getIpcHandle, ze_result_t(ze_ipc_event_pool_handle_t *pIpcHandle));
MOCK_METHOD0(closeIpcHandle, ze_result_t());
MOCK_METHOD2(createEvent, ze_result_t(const ze_event_desc_t *desc, ze_event_handle_t *phEvent));
MOCK_METHOD2(reserveEventFromPool, ze_result_t(int index, ::L0::Event *event));
MOCK_METHOD1(releaseEventToPool, ze_result_t(::L0::Event *event));
MOCK_METHOD0(getDevice, Device *());
MOCK_METHOD0(getEventSize, uint32_t());
MOCK_METHOD0(getNumEventTimestampsToRead, uint32_t());
std::vector<int> pool;
using EventPool::eventPoolAllocation;
};
} // namespace ult
} // namespace L0
#if defined(__clang__)
#pragma clang diagnostic pop
#endif

View File

@@ -6,11 +6,16 @@
*/ */
#include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/hw_info.h"
#include "shared/test/unit_test/cmd_parse/gen_cmd_parse.h" #include "shared/test/unit_test/cmd_parse/gen_cmd_parse.h"
#include "opencl/test/unit_test/mocks/mock_graphics_allocation.h"
#include "test.h" #include "test.h"
#include "level_zero/core/source/cmdlist/cmdlist_hw.h"
#include "level_zero/core/source/driver/driver_handle_imp.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_event.h"
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h" #include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
namespace L0 { namespace L0 {
@@ -32,7 +37,7 @@ TEST(zeCommandListCreateImmediate, redirectsToObject) {
} }
TEST_F(CommandListCreate, whenCommandListIsCreatedThenItIsInitialized) { TEST_F(CommandListCreate, whenCommandListIsCreatedThenItIsInitialized) {
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device)); std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, false));
ASSERT_NE(nullptr, commandList); ASSERT_NE(nullptr, commandList);
EXPECT_EQ(device, commandList->device); EXPECT_EQ(device, commandList->device);
@@ -57,7 +62,7 @@ TEST_F(CommandListCreate, whenCommandListIsCreatedThenItIsInitialized) {
} }
TEST_F(CommandListCreate, givenRegularCommandListThenDefaultNumIddPerBlockIsUsed) { TEST_F(CommandListCreate, givenRegularCommandListThenDefaultNumIddPerBlockIsUsed) {
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device)); std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, false));
ASSERT_NE(nullptr, commandList); ASSERT_NE(nullptr, commandList);
const uint32_t defaultNumIdds = CommandList::defaultNumIddsPerBlock; const uint32_t defaultNumIdds = CommandList::defaultNumIddsPerBlock;
@@ -71,7 +76,7 @@ TEST_F(CommandListCreate, givenImmediateCommandListThenCustomNumIddPerBlockUsed)
ZE_COMMAND_QUEUE_MODE_DEFAULT, ZE_COMMAND_QUEUE_MODE_DEFAULT,
ZE_COMMAND_QUEUE_PRIORITY_NORMAL, ZE_COMMAND_QUEUE_PRIORITY_NORMAL,
0}; 0};
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false)); std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, false));
ASSERT_NE(nullptr, commandList); ASSERT_NE(nullptr, commandList);
const uint32_t cmdListImmediateIdds = CommandList::commandListimmediateIddsPerBlock; const uint32_t cmdListImmediateIdds = CommandList::commandListimmediateIddsPerBlock;
@@ -85,7 +90,7 @@ TEST_F(CommandListCreate, whenCreatingImmediateCommandListThenItHasImmediateComm
ZE_COMMAND_QUEUE_MODE_DEFAULT, ZE_COMMAND_QUEUE_MODE_DEFAULT,
ZE_COMMAND_QUEUE_PRIORITY_NORMAL, ZE_COMMAND_QUEUE_PRIORITY_NORMAL,
0}; 0};
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false)); std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, false));
ASSERT_NE(nullptr, commandList); ASSERT_NE(nullptr, commandList);
EXPECT_EQ(device, commandList->device); EXPECT_EQ(device, commandList->device);
@@ -94,14 +99,14 @@ TEST_F(CommandListCreate, whenCreatingImmediateCommandListThenItHasImmediateComm
} }
TEST_F(CommandListCreate, givenInvalidProductFamilyThenReturnsNullPointer) { TEST_F(CommandListCreate, givenInvalidProductFamilyThenReturnsNullPointer) {
std::unique_ptr<L0::CommandList> commandList(CommandList::create(IGFX_UNKNOWN, device)); std::unique_ptr<L0::CommandList> commandList(CommandList::create(IGFX_UNKNOWN, device, false));
EXPECT_EQ(nullptr, commandList); EXPECT_EQ(nullptr, commandList);
} }
HWTEST_F(CommandListCreate, whenCommandListIsCreatedThenStateBaseAddressCmdIsAddedAndCorrectlyProgrammed) { HWTEST_F(CommandListCreate, whenCommandListIsCreatedThenStateBaseAddressCmdIsAddedAndCorrectlyProgrammed) {
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device)); std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, false));
auto &commandContainer = commandList->commandContainer; auto &commandContainer = commandList->commandContainer;
auto gmmHelper = commandContainer.getDevice()->getGmmHelper(); auto gmmHelper = commandContainer.getDevice()->getGmmHelper();
@@ -141,5 +146,265 @@ HWTEST_F(CommandListCreate, whenCommandListIsCreatedThenStateBaseAddressCmdIsAdd
EXPECT_EQ(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER), cmdSba->getStatelessDataPortAccessMemoryObjectControlState()); EXPECT_EQ(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER), cmdSba->getStatelessDataPortAccessMemoryObjectControlState());
} }
HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenCreatedThenStateBaseAddressCmdIsNotProgrammed) {
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, true));
auto &commandContainer = commandList->commandContainer;
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), itor);
}
HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenSetBarrierThenMiFlushDWIsProgrammed) {
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, true));
auto &commandContainer = commandList->commandContainer;
commandList->appendBarrier(nullptr, 0, nullptr);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<MI_FLUSH_DW *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
}
HWTEST_F(CommandListCreate, givenCommandListWhenSetBarrierThenPipeControlIsProgrammed) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, false));
auto &commandContainer = commandList->commandContainer;
commandList->appendBarrier(nullptr, 0, nullptr);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
}
template <GFXCORE_FAMILY gfxCoreFamily>
class MockCommandList : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>> {
public:
MockCommandList() : WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>(1) {}
AlignedAllocationData getAlignedAllocation(L0::Device *device, const void *buffer, uint64_t bufferSize) override {
return {0, 0, nullptr, true};
}
ze_result_t appendMemoryCopyKernelWithGA(void *dstPtr,
NEO::GraphicsAllocation *dstPtrAlloc,
uint64_t dstOffset,
void *srcPtr,
NEO::GraphicsAllocation *srcPtrAlloc,
uint64_t srcOffset,
uint32_t size,
uint32_t elementSize,
Builtin builtin) override {
appendMemoryCopyKernelWithGACalledTimes++;
return ZE_RESULT_SUCCESS;
}
virtual ze_result_t appendMemoryCopyBlit(NEO::GraphicsAllocation *dstPtrAlloc,
uint64_t dstOffset,
NEO::GraphicsAllocation *srcPtrAlloc,
uint64_t srcOffset, uint32_t size) override {
appendMemoryCopyBlitCalledTimes++;
return ZE_RESULT_SUCCESS;
}
virtual ze_result_t appendMemoryCopyBlitRegion(const void *srcptr,
const void *dstptr,
ze_copy_region_t srcRegion,
ze_copy_region_t dstRegion, Vec3<size_t> copySize,
size_t srcRowPitch, size_t srcSlicePitch,
size_t dstRowPitch, size_t dstSlicePitch,
size_t srcSize, size_t dstSize) override {
appendMemoryCopyBlitRegionCalledTimes++;
return ZE_RESULT_SUCCESS;
}
virtual ze_result_t appendMemoryCopyKernel2d(const void *dstptr, const void *srcptr,
Builtin builtin, const ze_copy_region_t *dstRegion,
uint32_t dstPitch, size_t dstOffset,
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
size_t srcOffset, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override {
appendMemoryCopyKernel2dCalledTimes++;
return ZE_RESULT_SUCCESS;
}
virtual ze_result_t appendMemoryCopyKernel3d(const void *dstptr, const void *srcptr,
Builtin builtin, const ze_copy_region_t *dstRegion,
uint32_t dstPitch, uint32_t dstSlicePitch, size_t dstOffset,
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
uint32_t srcSlicePitch, size_t srcOffset,
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override {
appendMemoryCopyKernel3dalledTimes++;
return ZE_RESULT_SUCCESS;
}
uint32_t appendMemoryCopyKernelWithGACalledTimes = 0;
uint32_t appendMemoryCopyBlitCalledTimes = 0;
uint32_t appendMemoryCopyBlitRegionCalledTimes = 0;
uint32_t appendMemoryCopyKernel2dCalledTimes = 0;
uint32_t appendMemoryCopyKernel3dalledTimes = 0;
};
using Platforms = IsAtLeastProduct<IGFX_SKYLAKE>;
HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyCalledThenAppendMemoryCopyWithappendMemoryCopyKernelWithGACalled, Platforms) {
MockCommandList<gfxCoreFamily> cmdList;
cmdList.initialize(device, false);
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
cmdList.appendMemoryCopy(dstPtr, srcPtr, 0x1001, nullptr, 0, nullptr);
EXPECT_GT(cmdList.appendMemoryCopyKernelWithGACalledTimes, 0u);
EXPECT_EQ(cmdList.appendMemoryCopyBlitCalledTimes, 0u);
}
HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyCalledThenAppendMemoryCopyWithappendMemoryCopyWithBliterCalled, Platforms) {
MockCommandList<gfxCoreFamily> cmdList;
cmdList.initialize(device, true);
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
cmdList.appendMemoryCopy(dstPtr, srcPtr, 0x1001, nullptr, 0, nullptr);
EXPECT_EQ(cmdList.appendMemoryCopyKernelWithGACalledTimes, 0u);
EXPECT_GT(cmdList.appendMemoryCopyBlitCalledTimes, 0u);
}
class MockDriverHandle : public L0::DriverHandleImp {
public:
bool findAllocationDataForRange(const void *buffer,
size_t size,
NEO::SvmAllocationData **allocData) override {
if (allocData) {
*allocData = &data;
}
return true;
}
NEO::SvmAllocationData data = {};
};
HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyRegionCalledThenAppendMemoryCopyWithappendMemoryCopyWithBliterCalled, Platforms) {
MockCommandList<gfxCoreFamily> cmdList;
cmdList.initialize(device, true);
MockDriverHandle driverHandle;
device->setDriverHandle(&driverHandle);
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_copy_region_t dstRegion = {};
ze_copy_region_t srcRegion = {};
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr);
EXPECT_GT(cmdList.appendMemoryCopyBlitRegionCalledTimes, 0u);
}
HWTEST2_F(CommandListCreate, givenCommandListAnd3DWhbufferenMemoryCopyRegionCalledThenCopyKernel3DCalled, Platforms) {
MockCommandList<gfxCoreFamily> cmdList;
cmdList.initialize(device, false);
MockDriverHandle driverHandle;
device->setDriverHandle(&driverHandle);
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2};
ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2};
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr);
EXPECT_EQ(cmdList.appendMemoryCopyBlitRegionCalledTimes, 0u);
EXPECT_GT(cmdList.appendMemoryCopyKernel3dalledTimes, 0u);
}
HWTEST2_F(CommandListCreate, givenCommandListAnd2DWhbufferenMemoryCopyRegionCalledThenCopyKernel2DCalled, Platforms) {
MockCommandList<gfxCoreFamily> cmdList;
cmdList.initialize(device, false);
MockDriverHandle driverHandle;
device->setDriverHandle(&driverHandle);
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_copy_region_t dstRegion = {4, 4, 0, 2, 2, 1};
ze_copy_region_t srcRegion = {4, 4, 0, 2, 2, 1};
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr);
EXPECT_EQ(cmdList.appendMemoryCopyBlitRegionCalledTimes, 0u);
EXPECT_GT(cmdList.appendMemoryCopyKernel2dCalledTimes, 0u);
}
class MockEvent : public Mock<Event> {
public:
MockEvent() {
mockAllocation.reset(new NEO::MockGraphicsAllocation(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
reinterpret_cast<void *>(0x1234), 0x1000, 0, sizeof(uint32_t),
MemoryPool::System4KBPages));
gpuAddress = mockAllocation->getGpuAddress();
}
NEO::GraphicsAllocation &getAllocation() override {
return *mockAllocation.get();
}
std::unique_ptr<NEO::GraphicsAllocation> mockAllocation;
};
HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenAppendSignalEventThenMiFlushDWIsProgrammed) {
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, true));
auto &commandContainer = commandList->commandContainer;
MockEvent event;
event.waitScope = ZE_EVENT_SCOPE_FLAG_NONE;
event.signalScope = ZE_EVENT_SCOPE_FLAG_NONE;
commandList->appendSignalEvent(event.toHandle());
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<MI_FLUSH_DW *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
}
HWTEST_F(CommandListCreate, givenCommandListyWhenAppendSignalEventThePipeControlIsProgrammed) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, false));
auto &commandContainer = commandList->commandContainer;
MockEvent event;
event.waitScope = ZE_EVENT_SCOPE_FLAG_NONE;
event.signalScope = ZE_EVENT_SCOPE_FLAG_NONE;
commandList->appendSignalEvent(event.toHandle());
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
}
HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenAppendWaitEventsWithDcFlushThenMiFlushDWIsProgrammed) {
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, true));
auto &commandContainer = commandList->commandContainer;
MockEvent event;
event.signalScope = ZE_EVENT_SCOPE_FLAG_NONE;
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
auto eventHandle = event.toHandle();
commandList->appendWaitOnEvents(1, &eventHandle);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<MI_FLUSH_DW *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
}
HWTEST_F(CommandListCreate, givenCommandListyWhenAppendWaitEventsWithDcFlushThePipeControlIsProgrammed) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, false));
auto &commandContainer = commandList->commandContainer;
MockEvent event;
event.signalScope = ZE_EVENT_SCOPE_FLAG_NONE;
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
auto eventHandle = event.toHandle();
commandList->appendWaitOnEvents(1, &eventHandle);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
}
} // namespace ult } // namespace ult
} // namespace L0 } // namespace L0

View File

@@ -23,7 +23,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenAp
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
createKernel(); createKernel();
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device)); std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, false));
auto &commandContainer = commandList->commandContainer; auto &commandContainer = commandList->commandContainer;
const auto stream = commandContainer.getCommandStream(); const auto stream = commandContainer.getCommandStream();
@@ -59,7 +59,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandListAppendLaunchKernel, givenFunctionWhenBind
createKernel(); createKernel();
ze_group_count_t groupCount{1, 1, 1}; ze_group_count_t groupCount{1, 1, 1};
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device)); std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, false));
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr); commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr);
auto commandStream = commandList->commandContainer.getCommandStream(); auto commandStream = commandList->commandContainer.getCommandStream();
@@ -86,7 +86,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandListAppendLaunchKernel, givenFunctionWhenBind
HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfUsedWhenAppendedToCommandListThenKernelIsStored) { HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfUsedWhenAppendedToCommandListThenKernelIsStored) {
createKernel(); createKernel();
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device)); std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, false));
ze_group_count_t groupCount{1, 1, 1}; ze_group_count_t groupCount{1, 1, 1};
EXPECT_TRUE(kernel->kernelImmData->getDescriptor().kernelAttributes.flags.usesPrintf); EXPECT_TRUE(kernel->kernelImmData->getDescriptor().kernelAttributes.flags.usesPrintf);
@@ -100,7 +100,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfUsedWhenAppendedToC
HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfUsedWhenAppendedToCommandListMultipleTimesThenKernelIsStoredOnce) { HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfUsedWhenAppendedToCommandListMultipleTimesThenKernelIsStoredOnce) {
createKernel(); createKernel();
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device)); std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, false));
ze_group_count_t groupCount{1, 1, 1}; ze_group_count_t groupCount{1, 1, 1};
EXPECT_TRUE(kernel->kernelImmData->getDescriptor().kernelAttributes.flags.usesPrintf); EXPECT_TRUE(kernel->kernelImmData->getDescriptor().kernelAttributes.flags.usesPrintf);
@@ -118,7 +118,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfUsedWhenAppendedToC
HWTEST_F(CommandListAppendLaunchKernel, WhenAppendingMultipleTimesThenSshIsNotDepletedButReallocated) { HWTEST_F(CommandListAppendLaunchKernel, WhenAppendingMultipleTimesThenSshIsNotDepletedButReallocated) {
createKernel(); createKernel();
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device)); std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, false));
ze_group_count_t groupCount{1, 1, 1}; ze_group_count_t groupCount{1, 1, 1};
auto kernelSshSize = kernel->getSurfaceStateHeapDataSize(); auto kernelSshSize = kernel->getSurfaceStateHeapDataSize();
@@ -143,7 +143,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, WhenAppendingFunctionThenUsedCmdBufferS
ze_group_count_t groupCount{1, 1, 1}; ze_group_count_t groupCount{1, 1, 1};
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>(); auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
bool ret = commandList->initialize(device); bool ret = commandList->initialize(device, false);
ASSERT_TRUE(ret); ASSERT_TRUE(ret);
auto sizeBefore = commandList->commandContainer.getCommandStream()->getUsed(); auto sizeBefore = commandList->commandContainer.getCommandStream()->getUsed();

View File

@@ -9,6 +9,7 @@
#include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h"
#include "shared/test/unit_test/helpers/default_hw_info.h" #include "shared/test/unit_test/helpers/default_hw_info.h"
#include "opencl/test/unit_test/mocks/mock_graphics_allocation.h"
#include "test.h" #include "test.h"
#include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/core/source/driver/driver_handle_imp.h"
@@ -34,7 +35,8 @@ TEST_F(CommandQueueCreate, whenCreatingCommandQueueThenItIsInitialized) {
L0::CommandQueue *commandQueue = CommandQueue::create(productFamily, L0::CommandQueue *commandQueue = CommandQueue::create(productFamily,
device, device,
csr.get(), csr.get(),
&desc); &desc,
false);
ASSERT_NE(nullptr, commandQueue); ASSERT_NE(nullptr, commandQueue);
L0::CommandQueueImp *commandQueueImp = reinterpret_cast<L0::CommandQueueImp *>(commandQueue); L0::CommandQueueImp *commandQueueImp = reinterpret_cast<L0::CommandQueueImp *>(commandQueue);
@@ -90,7 +92,7 @@ HWTEST2_F(CommandQueueProgramSBATest, whenCreatingCommandQueueThenItIsInitialize
desc.version = ZE_COMMAND_QUEUE_DESC_VERSION_CURRENT; desc.version = ZE_COMMAND_QUEUE_DESC_VERSION_CURRENT;
auto csr = std::unique_ptr<NEO::CommandStreamReceiver>(neoDevice->createCommandStreamReceiver()); auto csr = std::unique_ptr<NEO::CommandStreamReceiver>(neoDevice->createCommandStreamReceiver());
auto commandQueue = new MockCommandQueueHw<gfxCoreFamily>(device, csr.get(), &desc); auto commandQueue = new MockCommandQueueHw<gfxCoreFamily>(device, csr.get(), &desc);
commandQueue->initialize(); commandQueue->initialize(false);
uint32_t alignedSize = 4096u; uint32_t alignedSize = 4096u;
NEO::LinearStream child(commandQueue->commandStream->getSpace(alignedSize), alignedSize); NEO::LinearStream child(commandQueue->commandStream->getSpace(alignedSize), alignedSize);
@@ -103,5 +105,56 @@ HWTEST2_F(CommandQueueProgramSBATest, whenCreatingCommandQueueThenItIsInitialize
commandQueue->destroy(); commandQueue->destroy();
} }
TEST_F(CommandQueueCreate, givenCmdQueueWithBlitCopyWhenExecutingNonCopyBlitCommandListThenWrongCommandListStatusReturned) {
const ze_command_queue_desc_t desc = {
ZE_COMMAND_QUEUE_DESC_VERSION_CURRENT,
ZE_COMMAND_QUEUE_FLAG_COPY_ONLY,
ZE_COMMAND_QUEUE_MODE_DEFAULT,
ZE_COMMAND_QUEUE_PRIORITY_NORMAL,
0};
auto csr = std::unique_ptr<NEO::CommandStreamReceiver>(neoDevice->createCommandStreamReceiver());
L0::CommandQueue *commandQueue = CommandQueue::create(productFamily,
device,
csr.get(),
&desc,
true);
ASSERT_NE(nullptr, commandQueue);
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, false));
auto commandListHandle = commandList->toHandle();
auto status = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
EXPECT_EQ(status, ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE);
commandQueue->destroy();
}
TEST_F(CommandQueueCreate, givenCmdQueueWithBlitCopyWhenExecutingCopyBlitCommandListThenSuccessReturned) {
const ze_command_queue_desc_t desc = {
ZE_COMMAND_QUEUE_DESC_VERSION_CURRENT,
ZE_COMMAND_QUEUE_FLAG_COPY_ONLY,
ZE_COMMAND_QUEUE_MODE_DEFAULT,
ZE_COMMAND_QUEUE_PRIORITY_NORMAL,
0};
auto defaultCsr = neoDevice->getDefaultEngine().commandStreamReceiver;
L0::CommandQueue *commandQueue = CommandQueue::create(productFamily,
device,
defaultCsr,
&desc,
true);
ASSERT_NE(nullptr, commandQueue);
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, true));
auto commandListHandle = commandList->toHandle();
auto status = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
EXPECT_EQ(status, ZE_RESULT_SUCCESS);
commandQueue->destroy();
}
} // namespace ult } // namespace ult
} // namespace L0 } // namespace L0

View File

@@ -72,13 +72,13 @@ using CommandQueueDebugCommandsTest = Test<ActiveDebuggerFixture>;
HWTEST_F(CommandQueueDebugCommandsTest, givenDebuggingEnabledWhenCommandListIsExecutedThenKernelDebugCommandsAreAdded) { HWTEST_F(CommandQueueDebugCommandsTest, givenDebuggingEnabledWhenCommandListIsExecutedThenKernelDebugCommandsAreAdded) {
ze_command_queue_desc_t queueDesc = {}; ze_command_queue_desc_t queueDesc = {};
auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, deviceL0, device->getDefaultEngine().commandStreamReceiver, &queueDesc)); auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, deviceL0, device->getDefaultEngine().commandStreamReceiver, &queueDesc, false));
ASSERT_NE(nullptr, commandQueue->commandStream); ASSERT_NE(nullptr, commandQueue->commandStream);
auto usedSpaceBefore = commandQueue->commandStream->getUsed(); auto usedSpaceBefore = commandQueue->commandStream->getUsed();
ze_command_list_handle_t commandLists[] = { ze_command_list_handle_t commandLists[] = {
CommandList::create(productFamily, deviceL0)->toHandle()}; CommandList::create(productFamily, deviceL0, false)->toHandle()};
uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]);
auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);

View File

@@ -224,7 +224,7 @@ struct EncodeBatchBufferStartOrEnd {
template <typename GfxFamily> template <typename GfxFamily>
struct EncodeMiFlushDW { struct EncodeMiFlushDW {
using MI_FLUSH_DW = typename GfxFamily::MI_FLUSH_DW; using MI_FLUSH_DW = typename GfxFamily::MI_FLUSH_DW;
static void programMiFlushDw(LinearStream &commandStream, uint64_t immediateDataGpuAddress, uint64_t immediateData); static void programMiFlushDw(LinearStream &commandStream, uint64_t immediateDataGpuAddress, uint64_t immediateData, bool timeStampOperation, bool commandWithPostSync);
static void programMiFlushDwWA(LinearStream &commandStream); static void programMiFlushDwWA(LinearStream &commandStream);
static void appendMiFlushDw(MI_FLUSH_DW *miFlushDwCmd); static void appendMiFlushDw(MI_FLUSH_DW *miFlushDwCmd);
static size_t getMiFlushDwCmdSizeForDataWrite(); static size_t getMiFlushDwCmdSizeForDataWrite();

View File

@@ -395,14 +395,17 @@ void EncodeSurfaceState<Family>::getSshAlignedPointer(uintptr_t &ptr, size_t &of
} }
template <typename GfxFamily> template <typename GfxFamily>
void EncodeMiFlushDW<GfxFamily>::programMiFlushDw(LinearStream &commandStream, uint64_t immediateDataGpuAddress, uint64_t immediateData) { void EncodeMiFlushDW<GfxFamily>::programMiFlushDw(LinearStream &commandStream, uint64_t immediateDataGpuAddress, uint64_t immediateData, bool timeStampOperation, bool commandWithPostSync) {
programMiFlushDwWA(commandStream); programMiFlushDwWA(commandStream);
auto miFlushDwCmd = commandStream.getSpaceForCmd<MI_FLUSH_DW>(); auto miFlushDwCmd = commandStream.getSpaceForCmd<MI_FLUSH_DW>();
*miFlushDwCmd = GfxFamily::cmdInitMiFlushDw; *miFlushDwCmd = GfxFamily::cmdInitMiFlushDw;
miFlushDwCmd->setPostSyncOperation(MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD); if (commandWithPostSync) {
miFlushDwCmd->setDestinationAddress(immediateDataGpuAddress); auto postSyncType = timeStampOperation ? MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_TIMESTAMP_REGISTER : MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD;
miFlushDwCmd->setImmediateData(immediateData); miFlushDwCmd->setPostSyncOperation(postSyncType);
miFlushDwCmd->setDestinationAddress(immediateDataGpuAddress);
miFlushDwCmd->setImmediateData(immediateData);
}
appendMiFlushDw(miFlushDwCmd); appendMiFlushDw(miFlushDwCmd);
} }

View File

@@ -849,7 +849,7 @@ uint32_t CommandStreamReceiverHw<GfxFamily>::blitBuffer(const BlitPropertiesCont
if (blitProperties.outputTimestampPacket) { if (blitProperties.outputTimestampPacket) {
auto timestampPacketGpuAddress = blitProperties.outputTimestampPacket->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd); auto timestampPacketGpuAddress = blitProperties.outputTimestampPacket->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd);
EncodeMiFlushDW<GfxFamily>::programMiFlushDw(commandStream, timestampPacketGpuAddress, 0); EncodeMiFlushDW<GfxFamily>::programMiFlushDw(commandStream, timestampPacketGpuAddress, 0, true, true);
makeResident(*blitProperties.outputTimestampPacket->getBaseGraphicsAllocation()); makeResident(*blitProperties.outputTimestampPacket->getBaseGraphicsAllocation());
} }
@@ -861,7 +861,7 @@ uint32_t CommandStreamReceiverHw<GfxFamily>::blitBuffer(const BlitPropertiesCont
MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(commandStream, tagAllocation->getGpuAddress(), peekHwInfo()); MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(commandStream, tagAllocation->getGpuAddress(), peekHwInfo());
EncodeMiFlushDW<GfxFamily>::programMiFlushDw(commandStream, tagAllocation->getGpuAddress(), newTaskCount); EncodeMiFlushDW<GfxFamily>::programMiFlushDw(commandStream, tagAllocation->getGpuAddress(), newTaskCount, false, true);
MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(commandStream, tagAllocation->getGpuAddress(), peekHwInfo()); MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(commandStream, tagAllocation->getGpuAddress(), peekHwInfo());

View File

@@ -35,7 +35,7 @@ HWTEST_F(CommandEncoderTests, givenImmDataWriteWhenProgrammingMiFlushDwThenSetAl
uint64_t gpuAddress = 0x1230000; uint64_t gpuAddress = 0x1230000;
uint64_t immData = 456; uint64_t immData = 456;
EncodeMiFlushDW<FamilyType>::programMiFlushDw(linearStream, gpuAddress, immData); EncodeMiFlushDW<FamilyType>::programMiFlushDw(linearStream, gpuAddress, immData, false, true);
auto miFlushDwCmd = reinterpret_cast<MI_FLUSH_DW *>(buffer); auto miFlushDwCmd = reinterpret_cast<MI_FLUSH_DW *>(buffer);
unsigned int sizeMultiplier = 1; unsigned int sizeMultiplier = 1;

View File

@@ -11,6 +11,7 @@ set(NEO_CORE_ENCODERS_TESTS
${CMAKE_CURRENT_SOURCE_DIR}/test_encode_dispatch_kernel.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_encode_dispatch_kernel.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_encode_math.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_encode_math.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_encode_media_interface_descriptor.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_encode_media_interface_descriptor.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_encode_mi_flush_dw.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_encode_semaphore.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_encode_semaphore.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_encode_set_mmio.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_encode_set_mmio.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_encode_states.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_encode_states.cpp

View File

@@ -0,0 +1,124 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_stream/linear_stream.h"
#include "shared/source/helpers/blit_commands_helper.h"
#include "shared/test/unit_test/cmd_parse/gen_cmd_parse.h"
#include "opencl/test/unit_test/mocks/mock_graphics_allocation.h"
#include "test.h"
using namespace NEO;
using EncodeMiFlushDWTest = testing::Test;
HWTEST_F(EncodeMiFlushDWTest, GivenLinearStreamWhenCllaedEncodeWithNoPostSyncThenPostSyncNotWriteIsSet) {
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
uint32_t pCmdBuffer[1024];
MockGraphicsAllocation gfxAllocation((void *)pCmdBuffer, sizeof(pCmdBuffer));
LinearStream stream(&gfxAllocation);
EncodeMiFlushDW<FamilyType>::programMiFlushDw(stream, 0, 0, false, false);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, stream.getCpuBase(), stream.getUsed());
auto itor = commands.begin();
itor = find<MI_FLUSH_DW *>(itor, commands.end());
ASSERT_NE(itor, commands.end());
auto cmd = genCmdCast<MI_FLUSH_DW *>(*itor);
EXPECT_EQ(cmd->getPostSyncOperation(), MI_FLUSH_DW::POST_SYNC_OPERATION_NO_WRITE);
}
HWTEST_F(EncodeMiFlushDWTest, GivenLinearStreamWhenCllaedEncodeWithPostSyncDataThenPostSyncDataIsSet) {
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
uint32_t pCmdBuffer[1024];
MockGraphicsAllocation gfxAllocation((void *)pCmdBuffer, sizeof(pCmdBuffer));
LinearStream stream(&gfxAllocation);
uint64_t address = 0x1000;
uint64_t data = 0x4321;
EncodeMiFlushDW<FamilyType>::programMiFlushDw(stream, address, data, false, true);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, stream.getCpuBase(), stream.getUsed());
auto itor = commands.begin();
itor = find<MI_FLUSH_DW *>(itor, commands.end());
ASSERT_NE(itor, commands.end());
bool miFlushWithPostSyncFound = false;
for (; itor != commands.end(); itor++) {
auto cmd = genCmdCast<MI_FLUSH_DW *>(*itor);
if (cmd->getPostSyncOperation() != MI_FLUSH_DW::POST_SYNC_OPERATION_NO_WRITE) {
EXPECT_EQ(cmd->getDestinationAddress(), address);
EXPECT_EQ(cmd->getImmediateData(), data);
miFlushWithPostSyncFound = true;
break;
}
}
EXPECT_TRUE(miFlushWithPostSyncFound);
}
HWTEST_F(EncodeMiFlushDWTest, GivenLinearStreamWhenCllaedEncodeWithTimestampFaslseThenPostSyncDataTypeIsSet) {
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
uint32_t pCmdBuffer[1024];
MockGraphicsAllocation gfxAllocation((void *)pCmdBuffer, sizeof(pCmdBuffer));
LinearStream stream(&gfxAllocation);
uint64_t address = 0x1000;
uint64_t data = 0x4321;
EncodeMiFlushDW<FamilyType>::programMiFlushDw(stream, address, data, false, true);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, stream.getCpuBase(), stream.getUsed());
auto itor = commands.begin();
itor = find<MI_FLUSH_DW *>(itor, commands.end());
ASSERT_NE(itor, commands.end());
bool miFlushWithPostSyncFound = false;
for (; itor != commands.end(); itor++) {
auto cmd = genCmdCast<MI_FLUSH_DW *>(*itor);
if (cmd->getPostSyncOperation() != MI_FLUSH_DW::POST_SYNC_OPERATION_NO_WRITE) {
EXPECT_EQ(cmd->getPostSyncOperation(), MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD);
miFlushWithPostSyncFound = true;
break;
}
}
EXPECT_TRUE(miFlushWithPostSyncFound);
}
HWTEST_F(EncodeMiFlushDWTest, GivenLinearStreamWhenCllaedEncodeWithTimestampTrueThenPostSyncDataTypeIsSet) {
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
uint32_t pCmdBuffer[1024];
MockGraphicsAllocation gfxAllocation((void *)pCmdBuffer, sizeof(pCmdBuffer));
LinearStream stream(&gfxAllocation);
uint64_t address = 0x1000;
uint64_t data = 0x4321;
EncodeMiFlushDW<FamilyType>::programMiFlushDw(stream, address, data, true, true);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, stream.getCpuBase(), stream.getUsed());
auto itor = commands.begin();
itor = find<MI_FLUSH_DW *>(itor, commands.end());
ASSERT_NE(itor, commands.end());
bool miFlushWithPostSyncFound = false;
for (; itor != commands.end(); itor++) {
auto cmd = genCmdCast<MI_FLUSH_DW *>(*itor);
if (cmd->getPostSyncOperation() != MI_FLUSH_DW::POST_SYNC_OPERATION_NO_WRITE) {
EXPECT_EQ(cmd->getPostSyncOperation(), MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_TIMESTAMP_REGISTER);
miFlushWithPostSyncFound = true;
break;
}
}
EXPECT_TRUE(miFlushWithPostSyncFound);
}

View File

@@ -5,6 +5,7 @@
# #
set(NEO_CORE_HELPERS_TESTS set(NEO_CORE_HELPERS_TESTS
${CMAKE_CURRENT_SOURCE_DIR}/blit_commands_helper_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/debug_manager_state_restore.h ${CMAKE_CURRENT_SOURCE_DIR}/debug_manager_state_restore.h
${CMAKE_CURRENT_SOURCE_DIR}/default_hw_info.h ${CMAKE_CURRENT_SOURCE_DIR}/default_hw_info.h

View File

@@ -0,0 +1,73 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/blit_commands_helper.h"
#include "opencl/test/unit_test/mocks/mock_graphics_allocation.h"
#include "gtest/gtest.h"
using namespace NEO;
TEST(BlitCommandsHelperTest, GivenBufferParamsWhenConstructingPropertiesForBufferRegionsThenPropertiesCreatedCorrectly) {
uint32_t src[] = {1, 2, 3, 4};
uint32_t dst[] = {4, 3, 2, 1};
uint64_t srcGpuAddr = 0x12345;
uint64_t dstGpuAddr = 0x54321;
std::unique_ptr<MockGraphicsAllocation> srcAlloc(new MockGraphicsAllocation(src, srcGpuAddr, sizeof(src)));
std::unique_ptr<MockGraphicsAllocation> dstAlloc(new MockGraphicsAllocation(dst, dstGpuAddr, sizeof(dst)));
Vec3<size_t> srcOffsets{1, 2, 3};
Vec3<size_t> dstOffsets{3, 2, 1};
Vec3<size_t> copySize{2, 2, 2};
size_t srcRowPitch = 2;
size_t srcSlicePitch = 3;
size_t dstRowPitch = 2;
size_t dstSlicePitch = 3;
auto blitProperties = NEO::BlitProperties::constructPropertiesForCopyBuffer(dstAlloc.get(), srcAlloc.get(),
dstOffsets, srcOffsets, copySize, srcRowPitch, srcSlicePitch,
dstRowPitch, dstSlicePitch);
EXPECT_EQ(blitProperties.blitDirection, BlitterConstants::BlitDirection::BufferToBuffer);
EXPECT_EQ(blitProperties.dstAllocation, dstAlloc.get());
EXPECT_EQ(blitProperties.srcAllocation, srcAlloc.get());
EXPECT_EQ(blitProperties.dstGpuAddress, dstGpuAddr);
EXPECT_EQ(blitProperties.srcGpuAddress, srcGpuAddr);
EXPECT_EQ(blitProperties.copySize, copySize);
EXPECT_EQ(blitProperties.dstOffset, dstOffsets);
EXPECT_EQ(blitProperties.srcOffset, srcOffsets);
EXPECT_EQ(blitProperties.dstRowPitch, dstRowPitch);
EXPECT_EQ(blitProperties.dstSlicePitch, dstSlicePitch);
EXPECT_EQ(blitProperties.srcRowPitch, srcRowPitch);
EXPECT_EQ(blitProperties.srcSlicePitch, srcSlicePitch);
}
TEST(BlitCommandsHelperTest, GivenCopySizeYAndZEqual0WhenConstructingPropertiesForBufferRegionsThenCopyZAndZEqual1) {
uint32_t src[] = {1, 2, 3, 4};
uint32_t dst[] = {4, 3, 2, 1};
uint64_t srcGpuAddr = 0x12345;
uint64_t dstGpuAddr = 0x54321;
std::unique_ptr<MockGraphicsAllocation> srcAlloc(new MockGraphicsAllocation(src, srcGpuAddr, sizeof(src)));
std::unique_ptr<MockGraphicsAllocation> dstAlloc(new MockGraphicsAllocation(dst, dstGpuAddr, sizeof(dst)));
Vec3<size_t> srcOffsets{1, 2, 3};
Vec3<size_t> dstOffsets{3, 2, 1};
Vec3<size_t> copySize{2, 0, 0};
size_t srcRowPitch = 2;
size_t srcSlicePitch = 3;
size_t dstRowPitch = 2;
size_t dstSlicePitch = 3;
auto blitProperties = NEO::BlitProperties::constructPropertiesForCopyBuffer(dstAlloc.get(), srcAlloc.get(),
dstOffsets, srcOffsets, copySize, srcRowPitch, srcSlicePitch,
dstRowPitch, dstSlicePitch);
Vec3<size_t> expectedSize{copySize.x, 1, 1};
EXPECT_EQ(blitProperties.copySize, expectedSize);
}