mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 09:14:47 +08:00
Add blit copy implementation for L0
Change-Id: I327a4cf977e166cb648ee9f3a79374f7cefa7b1b Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
committed by
sys_ocldev
parent
3ce0450a9c
commit
691a4ea823
@@ -77,9 +77,12 @@ void CommandList::eraseResidencyContainerEntry(NEO::GraphicsAllocation *allocati
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool CommandList::isCopyOnly() const {
|
||||||
|
return isCopyOnlyCmdList;
|
||||||
|
}
|
||||||
|
|
||||||
NEO::PreemptionMode CommandList::obtainFunctionPreemptionMode(Kernel *kernel) {
|
NEO::PreemptionMode CommandList::obtainFunctionPreemptionMode(Kernel *kernel) {
|
||||||
auto functionAttributes = kernel->getImmutableData()->getDescriptor().kernelAttributes;
|
auto functionAttributes = kernel->getImmutableData()->getDescriptor().kernelAttributes;
|
||||||
|
|
||||||
NEO::PreemptionFlags flags = {};
|
NEO::PreemptionFlags flags = {};
|
||||||
flags.flags.disabledMidThreadPreemptionKernel = functionAttributes.flags.requiresDisabledMidThreadPreemption;
|
flags.flags.disabledMidThreadPreemptionKernel = functionAttributes.flags.requiresDisabledMidThreadPreemption;
|
||||||
flags.flags.usesFencesForReadWriteImages = functionAttributes.flags.usesFencesForReadWriteImages;
|
flags.flags.usesFencesForReadWriteImages = functionAttributes.flags.usesFencesForReadWriteImages;
|
||||||
|
|||||||
@@ -117,10 +117,10 @@ struct CommandList : _ze_command_list_handle_t {
|
|||||||
virtual ze_result_t appendMIBBEnd() = 0;
|
virtual ze_result_t appendMIBBEnd() = 0;
|
||||||
virtual ze_result_t appendMINoop() = 0;
|
virtual ze_result_t appendMINoop() = 0;
|
||||||
|
|
||||||
static CommandList *create(uint32_t productFamily, Device *device);
|
static CommandList *create(uint32_t productFamily, Device *device, bool isCopyOnly);
|
||||||
static CommandList *createImmediate(uint32_t productFamily, Device *device,
|
static CommandList *createImmediate(uint32_t productFamily, Device *device,
|
||||||
const ze_command_queue_desc_t *desc,
|
const ze_command_queue_desc_t *desc,
|
||||||
bool internalUsage);
|
bool internalUsage, bool isCopyOnly);
|
||||||
|
|
||||||
static CommandList *fromHandle(ze_command_list_handle_t handle) {
|
static CommandList *fromHandle(ze_command_list_handle_t handle) {
|
||||||
return static_cast<CommandList *>(handle);
|
return static_cast<CommandList *>(handle);
|
||||||
@@ -147,6 +147,7 @@ struct CommandList : _ze_command_list_handle_t {
|
|||||||
void removeHostPtrAllocations();
|
void removeHostPtrAllocations();
|
||||||
void eraseDeallocationContainerEntry(NEO::GraphicsAllocation *allocation);
|
void eraseDeallocationContainerEntry(NEO::GraphicsAllocation *allocation);
|
||||||
void eraseResidencyContainerEntry(NEO::GraphicsAllocation *allocation);
|
void eraseResidencyContainerEntry(NEO::GraphicsAllocation *allocation);
|
||||||
|
bool isCopyOnly() const;
|
||||||
|
|
||||||
enum CommandListType : uint32_t {
|
enum CommandListType : uint32_t {
|
||||||
TYPE_REGULAR = 0u,
|
TYPE_REGULAR = 0u,
|
||||||
@@ -161,7 +162,7 @@ struct CommandList : _ze_command_list_handle_t {
|
|||||||
std::vector<Kernel *> printfFunctionContainer;
|
std::vector<Kernel *> printfFunctionContainer;
|
||||||
|
|
||||||
virtual ze_result_t executeCommandListImmediate(bool performMigration) = 0;
|
virtual ze_result_t executeCommandListImmediate(bool performMigration) = 0;
|
||||||
virtual bool initialize(Device *device) = 0;
|
virtual bool initialize(Device *device, bool isCopyOnly) = 0;
|
||||||
virtual ~CommandList();
|
virtual ~CommandList();
|
||||||
NEO::CommandContainer commandContainer;
|
NEO::CommandContainer commandContainer;
|
||||||
|
|
||||||
@@ -169,6 +170,7 @@ struct CommandList : _ze_command_list_handle_t {
|
|||||||
std::map<const void *, NEO::GraphicsAllocation *> hostPtrMap;
|
std::map<const void *, NEO::GraphicsAllocation *> hostPtrMap;
|
||||||
uint32_t commandListPerThreadScratchSize = 0u;
|
uint32_t commandListPerThreadScratchSize = 0u;
|
||||||
NEO::PreemptionMode commandListPreemptionMode = NEO::PreemptionMode::Initial;
|
NEO::PreemptionMode commandListPreemptionMode = NEO::PreemptionMode::Initial;
|
||||||
|
bool isCopyOnlyCmdList = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
using CommandListAllocatorFn = CommandList *(*)(uint32_t);
|
using CommandListAllocatorFn = CommandList *(*)(uint32_t);
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ struct CommandListCoreFamily : CommandListImp {
|
|||||||
|
|
||||||
using CommandListImp::CommandListImp;
|
using CommandListImp::CommandListImp;
|
||||||
|
|
||||||
bool initialize(Device *device) override;
|
bool initialize(Device *device, bool isCopyOnly) override;
|
||||||
virtual void programL3(bool isSLMused);
|
virtual void programL3(bool isSLMused);
|
||||||
|
|
||||||
ze_result_t close() override;
|
ze_result_t close() override;
|
||||||
@@ -124,26 +124,39 @@ struct CommandListCoreFamily : CommandListImp {
|
|||||||
ze_result_t executeCommandListImmediate(bool performMigration) override;
|
ze_result_t executeCommandListImmediate(bool performMigration) override;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
ze_result_t appendMemoryCopyKernelWithGA(void *dstPtr, NEO::GraphicsAllocation *dstPtrAlloc,
|
MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyKernelWithGA(void *dstPtr, NEO::GraphicsAllocation *dstPtrAlloc,
|
||||||
uint64_t dstOffset, void *srcPtr,
|
uint64_t dstOffset, void *srcPtr,
|
||||||
NEO::GraphicsAllocation *srcPtrAlloc,
|
NEO::GraphicsAllocation *srcPtrAlloc,
|
||||||
uint64_t srcOffset, uint32_t size,
|
uint64_t srcOffset, uint32_t size,
|
||||||
uint32_t elementSize, Builtin builtin);
|
uint32_t elementSize, Builtin builtin);
|
||||||
|
|
||||||
ze_result_t appendMemoryCopyKernel2d(const void *dstptr, const void *srcptr,
|
MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyBlit(NEO::GraphicsAllocation *dstPtrAlloc,
|
||||||
Builtin builtin, const ze_copy_region_t *dstRegion,
|
uint64_t dstOffset,
|
||||||
uint32_t dstPitch, size_t dstOffset,
|
NEO::GraphicsAllocation *srcPtrAlloc,
|
||||||
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
|
uint64_t srcOffset, uint32_t size);
|
||||||
size_t srcOffset, ze_event_handle_t hSignalEvent,
|
|
||||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents);
|
|
||||||
|
|
||||||
ze_result_t appendMemoryCopyKernel3d(const void *dstptr, const void *srcptr,
|
MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyBlitRegion(const void *srcptr,
|
||||||
Builtin builtin, const ze_copy_region_t *dstRegion,
|
const void *dstptr,
|
||||||
uint32_t dstPitch, uint32_t dstSlicePitch, size_t dstOffset,
|
ze_copy_region_t srcRegion,
|
||||||
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
|
ze_copy_region_t dstRegion, Vec3<size_t> copySize,
|
||||||
uint32_t srcSlicePitch, size_t srcOffset,
|
size_t srcRowPitch, size_t srcSlicePitch,
|
||||||
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
|
size_t dstRowPitch, size_t dstSlicePitch,
|
||||||
ze_event_handle_t *phWaitEvents);
|
size_t srcSize, size_t dstSize);
|
||||||
|
|
||||||
|
MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyKernel2d(const void *dstptr, const void *srcptr,
|
||||||
|
Builtin builtin, const ze_copy_region_t *dstRegion,
|
||||||
|
uint32_t dstPitch, size_t dstOffset,
|
||||||
|
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
|
||||||
|
size_t srcOffset, ze_event_handle_t hSignalEvent,
|
||||||
|
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents);
|
||||||
|
|
||||||
|
MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyKernel3d(const void *dstptr, const void *srcptr,
|
||||||
|
Builtin builtin, const ze_copy_region_t *dstRegion,
|
||||||
|
uint32_t dstPitch, uint32_t dstSlicePitch, size_t dstOffset,
|
||||||
|
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
|
||||||
|
uint32_t srcSlicePitch, size_t srcOffset,
|
||||||
|
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
|
||||||
|
ze_event_handle_t *phWaitEvents);
|
||||||
|
|
||||||
ze_result_t appendLaunchKernelWithParams(ze_kernel_handle_t hKernel,
|
ze_result_t appendLaunchKernelWithParams(ze_kernel_handle_t hKernel,
|
||||||
const ze_group_count_t *pThreadGroupDimensions,
|
const ze_group_count_t *pThreadGroupDimensions,
|
||||||
@@ -160,7 +173,7 @@ struct CommandListCoreFamily : CommandListImp {
|
|||||||
void appendSignalEventPostWalker(ze_event_handle_t hEvent);
|
void appendSignalEventPostWalker(ze_event_handle_t hEvent);
|
||||||
|
|
||||||
uint64_t getInputBufferSize(NEO::ImageType imageType, uint64_t bytesPerPixel, const ze_image_region_t *region);
|
uint64_t getInputBufferSize(NEO::ImageType imageType, uint64_t bytesPerPixel, const ze_image_region_t *region);
|
||||||
AlignedAllocationData getAlignedAllocation(Device *device, const void *buffer, uint64_t bufferSize);
|
virtual AlignedAllocationData getAlignedAllocation(Device *device, const void *buffer, uint64_t bufferSize);
|
||||||
ze_result_t addEventsToCmdList(ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents);
|
ze_result_t addEventsToCmdList(ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -10,6 +10,7 @@
|
|||||||
#include "shared/source/command_stream/linear_stream.h"
|
#include "shared/source/command_stream/linear_stream.h"
|
||||||
#include "shared/source/command_stream/preemption.h"
|
#include "shared/source/command_stream/preemption.h"
|
||||||
#include "shared/source/device/device.h"
|
#include "shared/source/device/device.h"
|
||||||
|
#include "shared/source/helpers/blit_commands_helper.h"
|
||||||
#include "shared/source/helpers/heap_helper.h"
|
#include "shared/source/helpers/heap_helper.h"
|
||||||
#include "shared/source/helpers/hw_helper.h"
|
#include "shared/source/helpers/hw_helper.h"
|
||||||
#include "shared/source/helpers/hw_info.h"
|
#include "shared/source/helpers/hw_info.h"
|
||||||
@@ -35,17 +36,19 @@ template <GFXCORE_FAMILY gfxCoreFamily>
|
|||||||
struct EncodeStateBaseAddress;
|
struct EncodeStateBaseAddress;
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
bool CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device) {
|
bool CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, bool isCopyOnly) {
|
||||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||||
|
|
||||||
if (!commandContainer.initialize(static_cast<DeviceImp *>(device)->neoDevice)) {
|
if (!commandContainer.initialize(static_cast<DeviceImp *>(device)->neoDevice)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
NEO::EncodeStateBaseAddress<GfxFamily>::encode(commandContainer);
|
if (!isCopyOnly) {
|
||||||
commandContainer.setDirtyStateForAllHeaps(false);
|
NEO::EncodeStateBaseAddress<GfxFamily>::encode(commandContainer);
|
||||||
|
commandContainer.setDirtyStateForAllHeaps(false);
|
||||||
|
}
|
||||||
this->device = device;
|
this->device = device;
|
||||||
this->commandListPreemptionMode = device->getDevicePreemptionMode();
|
this->commandListPreemptionMode = device->getDevicePreemptionMode();
|
||||||
|
this->isCopyOnlyCmdList = isCopyOnly;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -182,7 +185,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
|
|||||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||||
}
|
}
|
||||||
|
|
||||||
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), false);
|
if (isCopyOnlyCmdList) {
|
||||||
|
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, false, false);
|
||||||
|
} else {
|
||||||
|
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), false);
|
||||||
|
}
|
||||||
|
|
||||||
if (hSignalEvent) {
|
if (hSignalEvent) {
|
||||||
this->appendSignalEventPostWalker(hSignalEvent);
|
this->appendSignalEventPostWalker(hSignalEvent);
|
||||||
@@ -565,6 +572,48 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernelWithGA(v
|
|||||||
nullptr, 0, nullptr);
|
nullptr, 0, nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
|
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlit(NEO::GraphicsAllocation *dstPtrAlloc,
|
||||||
|
uint64_t dstOffset,
|
||||||
|
NEO::GraphicsAllocation *srcPtrAlloc,
|
||||||
|
uint64_t srcOffset,
|
||||||
|
uint32_t size) {
|
||||||
|
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||||
|
auto blitProperties = NEO::BlitProperties::constructPropertiesForCopyBuffer(dstPtrAlloc, srcPtrAlloc, {dstOffset, 0, 0}, {srcOffset, 0, 0}, {size, 0, 0}, 0, 0, 0, 0);
|
||||||
|
commandContainer.addToResidencyContainer(dstPtrAlloc);
|
||||||
|
commandContainer.addToResidencyContainer(srcPtrAlloc);
|
||||||
|
NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForBuffer(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
|
||||||
|
return ZE_RESULT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
|
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(const void *srcptr,
|
||||||
|
const void *dstptr,
|
||||||
|
ze_copy_region_t srcRegion,
|
||||||
|
ze_copy_region_t dstRegion, Vec3<size_t> copySize,
|
||||||
|
size_t srcRowPitch, size_t srcSlicePitch,
|
||||||
|
size_t dstRowPitch, size_t dstSlicePitch,
|
||||||
|
size_t srcSize, size_t dstSize) {
|
||||||
|
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||||
|
|
||||||
|
auto dstAllocationStruct = getAlignedAllocation(this->device, dstptr, dstSize);
|
||||||
|
auto srcAllocationStruct = getAlignedAllocation(this->device, srcptr, srcSize);
|
||||||
|
|
||||||
|
auto srcPtrAlloc = srcAllocationStruct.alloc;
|
||||||
|
auto dstPtrAlloc = dstAllocationStruct.alloc;
|
||||||
|
|
||||||
|
Vec3<size_t> srcPtrOffset = {srcRegion.originX, srcRegion.originY, srcRegion.originZ};
|
||||||
|
Vec3<size_t> dstPtrOffset = {dstRegion.originX, dstRegion.originY, dstRegion.originZ};
|
||||||
|
|
||||||
|
auto blitProperties = NEO::BlitProperties::constructPropertiesForCopyBuffer(dstPtrAlloc, srcPtrAlloc,
|
||||||
|
dstPtrOffset, srcPtrOffset, copySize, srcRowPitch, srcSlicePitch,
|
||||||
|
dstRowPitch, dstSlicePitch);
|
||||||
|
commandContainer.addToResidencyContainer(dstPtrAlloc);
|
||||||
|
commandContainer.addToResidencyContainer(srcPtrAlloc);
|
||||||
|
NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForBuffer(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]);
|
||||||
|
return ZE_RESULT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendPageFaultCopy(NEO::GraphicsAllocation *dstptr,
|
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendPageFaultCopy(NEO::GraphicsAllocation *dstptr,
|
||||||
NEO::GraphicsAllocation *srcptr,
|
NEO::GraphicsAllocation *srcptr,
|
||||||
@@ -644,31 +693,37 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
|||||||
appendEventForProfiling(hSignalEvent, true);
|
appendEventForProfiling(hSignalEvent, true);
|
||||||
|
|
||||||
if (ret == ZE_RESULT_SUCCESS && leftSize) {
|
if (ret == ZE_RESULT_SUCCESS && leftSize) {
|
||||||
ret = appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
|
ret = isCopyOnlyCmdList ? appendMemoryCopyBlit(dstAllocationStruct.alloc, dstAllocationStruct.offset,
|
||||||
dstAllocationStruct.alloc, dstAllocationStruct.offset,
|
srcAllocationStruct.alloc, srcAllocationStruct.offset, static_cast<uint32_t>(leftSize))
|
||||||
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
|
: appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
|
||||||
srcAllocationStruct.alloc, srcAllocationStruct.offset,
|
dstAllocationStruct.alloc, dstAllocationStruct.offset,
|
||||||
static_cast<uint32_t>(leftSize), 1,
|
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
|
||||||
Builtin::CopyBufferToBufferSide);
|
srcAllocationStruct.alloc, srcAllocationStruct.offset,
|
||||||
|
static_cast<uint32_t>(leftSize), 1,
|
||||||
|
Builtin::CopyBufferToBufferSide);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ret == ZE_RESULT_SUCCESS && middleSizeBytes) {
|
if (ret == ZE_RESULT_SUCCESS && middleSizeBytes) {
|
||||||
ret = appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
|
ret = isCopyOnlyCmdList ? appendMemoryCopyBlit(dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset,
|
||||||
dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset,
|
srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset, static_cast<uint32_t>(middleSizeBytes))
|
||||||
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
|
: appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
|
||||||
srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset,
|
dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset,
|
||||||
static_cast<uint32_t>(middleSizeBytes),
|
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
|
||||||
static_cast<uint32_t>(middleElSize),
|
srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset,
|
||||||
Builtin::CopyBufferToBufferMiddle);
|
static_cast<uint32_t>(middleSizeBytes),
|
||||||
|
static_cast<uint32_t>(middleElSize),
|
||||||
|
Builtin::CopyBufferToBufferMiddle);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ret == ZE_RESULT_SUCCESS && rightSize) {
|
if (ret == ZE_RESULT_SUCCESS && rightSize) {
|
||||||
ret = appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
|
ret = isCopyOnlyCmdList ? appendMemoryCopyBlit(dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset,
|
||||||
dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset,
|
srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset, static_cast<uint32_t>(rightSize))
|
||||||
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
|
: appendMemoryCopyKernelWithGA(reinterpret_cast<void *>(&dstAllocationStruct.alignedAllocationPtr),
|
||||||
srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset,
|
dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset,
|
||||||
static_cast<uint32_t>(rightSize), 1u,
|
reinterpret_cast<void *>(&srcAllocationStruct.alignedAllocationPtr),
|
||||||
Builtin::CopyBufferToBufferSide);
|
srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset,
|
||||||
|
static_cast<uint32_t>(rightSize), 1u,
|
||||||
|
Builtin::CopyBufferToBufferSide);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (hSignalEvent) {
|
if (hSignalEvent) {
|
||||||
@@ -743,13 +798,15 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
|
|||||||
|
|
||||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||||
if (srcRegion->depth > 1) {
|
if (srcRegion->depth > 1) {
|
||||||
result = this->appendMemoryCopyKernel3d(alignedDstPtr, alignedSrcPtr,
|
result = isCopyOnlyCmdList ? appendMemoryCopyBlitRegion(alignedSrcPtr, alignedDstPtr, *srcRegion, *dstRegion, {srcRegion->width, srcRegion->height, srcRegion->depth}, srcPitch, srcSlicePitch, dstPitch, dstSlicePitch, srcSize, dstSize)
|
||||||
Builtin::CopyBufferRectBytes3d, dstRegion, dstPitch, dstSlicePitch, dstOffset,
|
: this->appendMemoryCopyKernel3d(alignedDstPtr, alignedSrcPtr,
|
||||||
srcRegion, srcPitch, srcSlicePitch, srcOffset, hSignalEvent, 0, nullptr);
|
Builtin::CopyBufferRectBytes3d, dstRegion, dstPitch, dstSlicePitch, dstOffset,
|
||||||
|
srcRegion, srcPitch, srcSlicePitch, srcOffset, hSignalEvent, 0, nullptr);
|
||||||
} else {
|
} else {
|
||||||
result = this->appendMemoryCopyKernel2d(alignedDstPtr, alignedSrcPtr,
|
result = isCopyOnlyCmdList ? appendMemoryCopyBlitRegion(alignedSrcPtr, alignedDstPtr, *srcRegion, *dstRegion, {srcRegion->width, srcRegion->height, srcRegion->depth}, srcPitch, srcSlicePitch, dstPitch, dstSlicePitch, srcSize, dstSize)
|
||||||
Builtin::CopyBufferRectBytes2d, dstRegion, dstPitch, dstOffset,
|
: this->appendMemoryCopyKernel2d(alignedDstPtr, alignedSrcPtr,
|
||||||
srcRegion, srcPitch, srcOffset, hSignalEvent, 0, nullptr);
|
Builtin::CopyBufferRectBytes2d, dstRegion, dstPitch, dstOffset,
|
||||||
|
srcRegion, srcPitch, srcOffset, hSignalEvent, 0, nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (result) {
|
if (result) {
|
||||||
@@ -757,7 +814,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (hostPointerNeedsFlush) {
|
if (hostPointerNeedsFlush) {
|
||||||
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), true);
|
if (isCopyOnlyCmdList) {
|
||||||
|
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, false, false);
|
||||||
|
} else {
|
||||||
|
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), true);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return ZE_RESULT_SUCCESS;
|
return ZE_RESULT_SUCCESS;
|
||||||
@@ -1091,12 +1152,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
|
|||||||
auto event = Event::fromHandle(hEvent);
|
auto event = Event::fromHandle(hEvent);
|
||||||
|
|
||||||
commandContainer.addToResidencyContainer(&event->getAllocation());
|
commandContainer.addToResidencyContainer(&event->getAllocation());
|
||||||
|
if (isCopyOnlyCmdList) {
|
||||||
bool dcFlushEnable = (event->signalScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true;
|
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), event->getGpuAddress(), Event::STATE_SIGNALED, false, true);
|
||||||
NEO::MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(
|
} else {
|
||||||
*commandContainer.getCommandStream(), POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
|
bool dcFlushEnable = (event->signalScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true;
|
||||||
event->getGpuAddress(), Event::STATE_SIGNALED, dcFlushEnable, commandContainer.getDevice()->getHardwareInfo());
|
NEO::MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(
|
||||||
|
*commandContainer.getCommandStream(), POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
|
||||||
|
event->getGpuAddress(), Event::STATE_SIGNALED, dcFlushEnable, commandContainer.getDevice()->getHardwareInfo());
|
||||||
|
}
|
||||||
return ZE_RESULT_SUCCESS;
|
return ZE_RESULT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1127,7 +1190,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
|
|||||||
|
|
||||||
bool dcFlushEnable = (event->waitScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true;
|
bool dcFlushEnable = (event->waitScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true;
|
||||||
if (dcFlushEnable) {
|
if (dcFlushEnable) {
|
||||||
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), true);
|
if (isCopyOnlyCmdList) {
|
||||||
|
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, false, false);
|
||||||
|
} else {
|
||||||
|
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), true);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -101,19 +101,22 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(ze_event_hand
|
|||||||
timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::CONTEXT_START);
|
timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::CONTEXT_START);
|
||||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(commandContainer, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timeStampAddress);
|
NEO::EncodeStoreMMIO<GfxFamily>::encode(commandContainer, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timeStampAddress);
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::CONTEXT_END);
|
timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::CONTEXT_END);
|
||||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(commandContainer, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timeStampAddress);
|
NEO::EncodeStoreMMIO<GfxFamily>::encode(commandContainer, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timeStampAddress);
|
||||||
|
|
||||||
timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::GLOBAL_END);
|
timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::GLOBAL_END);
|
||||||
bool dcFlushEnable = (event->signalScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true;
|
bool dcFlushEnable = (event->signalScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true;
|
||||||
|
|
||||||
NEO::MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(
|
if (isCopyOnlyCmdList) {
|
||||||
*(commandContainer.getCommandStream()), POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP,
|
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), timeStampAddress, 0llu, true, true);
|
||||||
timeStampAddress,
|
} else {
|
||||||
0llu,
|
NEO::MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(
|
||||||
dcFlushEnable,
|
*(commandContainer.getCommandStream()), POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP,
|
||||||
device->getHwInfo());
|
timeStampAddress,
|
||||||
|
0llu,
|
||||||
|
dcFlushEnable,
|
||||||
|
device->getHwInfo());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // namespace L0
|
} // namespace L0
|
||||||
|
|||||||
@@ -10,6 +10,7 @@
|
|||||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||||
#include "shared/source/command_stream/linear_stream.h"
|
#include "shared/source/command_stream/linear_stream.h"
|
||||||
#include "shared/source/device/device.h"
|
#include "shared/source/device/device.h"
|
||||||
|
#include "shared/source/helpers/engine_node_helper.h"
|
||||||
#include "shared/source/indirect_heap/indirect_heap.h"
|
#include "shared/source/indirect_heap/indirect_heap.h"
|
||||||
|
|
||||||
#include "level_zero/core/source/device/device.h"
|
#include "level_zero/core/source/device/device.h"
|
||||||
@@ -48,7 +49,7 @@ ze_result_t CommandListImp::appendMetricQueryEnd(zet_metric_query_handle_t hMetr
|
|||||||
return MetricQuery::fromHandle(hMetricQuery)->appendEnd(*this, hCompletionEvent);
|
return MetricQuery::fromHandle(hMetricQuery)->appendEnd(*this, hCompletionEvent);
|
||||||
}
|
}
|
||||||
|
|
||||||
CommandList *CommandList::create(uint32_t productFamily, Device *device) {
|
CommandList *CommandList::create(uint32_t productFamily, Device *device, bool isCopyOnly) {
|
||||||
CommandListAllocatorFn allocator = nullptr;
|
CommandListAllocatorFn allocator = nullptr;
|
||||||
if (productFamily < IGFX_MAX_PRODUCT) {
|
if (productFamily < IGFX_MAX_PRODUCT) {
|
||||||
allocator = commandListFactory[productFamily];
|
allocator = commandListFactory[productFamily];
|
||||||
@@ -58,24 +59,27 @@ CommandList *CommandList::create(uint32_t productFamily, Device *device) {
|
|||||||
if (allocator) {
|
if (allocator) {
|
||||||
commandList = static_cast<CommandListImp *>((*allocator)(CommandList::defaultNumIddsPerBlock));
|
commandList = static_cast<CommandListImp *>((*allocator)(CommandList::defaultNumIddsPerBlock));
|
||||||
|
|
||||||
commandList->initialize(device);
|
commandList->initialize(device, isCopyOnly);
|
||||||
}
|
}
|
||||||
return commandList;
|
return commandList;
|
||||||
}
|
}
|
||||||
|
|
||||||
CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device,
|
CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device,
|
||||||
const ze_command_queue_desc_t *desc,
|
const ze_command_queue_desc_t *desc,
|
||||||
bool internalUsage) {
|
bool internalUsage, bool isCopyOnly) {
|
||||||
|
|
||||||
auto deviceImp = static_cast<DeviceImp *>(device);
|
auto deviceImp = static_cast<DeviceImp *>(device);
|
||||||
NEO::CommandStreamReceiver *csr = nullptr;
|
NEO::CommandStreamReceiver *csr = nullptr;
|
||||||
if (internalUsage) {
|
if (internalUsage) {
|
||||||
csr = deviceImp->neoDevice->getInternalEngine().commandStreamReceiver;
|
csr = deviceImp->neoDevice->getInternalEngine().commandStreamReceiver;
|
||||||
|
} else if (isCopyOnly) {
|
||||||
|
auto &selectorCopyEngine = deviceImp->neoDevice->getDeviceById(0)->getSelectorCopyEngine();
|
||||||
|
csr = deviceImp->neoDevice->getDeviceById(0)->getEngine(NEO::EngineHelpers::getBcsEngineType(deviceImp->neoDevice->getHardwareInfo(), selectorCopyEngine), false).commandStreamReceiver;
|
||||||
} else {
|
} else {
|
||||||
csr = deviceImp->neoDevice->getDefaultEngine().commandStreamReceiver;
|
csr = deviceImp->neoDevice->getDefaultEngine().commandStreamReceiver;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto commandQueue = CommandQueue::create(productFamily, device, csr, desc);
|
auto commandQueue = CommandQueue::create(productFamily, device, csr, desc, isCopyOnly);
|
||||||
if (!commandQueue) {
|
if (!commandQueue) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
@@ -89,7 +93,7 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device
|
|||||||
if (allocator) {
|
if (allocator) {
|
||||||
commandList = static_cast<CommandListImp *>((*allocator)(CommandList::commandListimmediateIddsPerBlock));
|
commandList = static_cast<CommandListImp *>((*allocator)(CommandList::commandListimmediateIddsPerBlock));
|
||||||
|
|
||||||
commandList->initialize(device);
|
commandList->initialize(device, isCopyOnly);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!commandList) {
|
if (!commandList) {
|
||||||
|
|||||||
@@ -28,13 +28,14 @@ ze_result_t CommandQueueImp::destroy() {
|
|||||||
return ZE_RESULT_SUCCESS;
|
return ZE_RESULT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
void CommandQueueImp::initialize() {
|
void CommandQueueImp::initialize(bool copyOnly) {
|
||||||
buffers.initialize(device, totalCmdBufferSize);
|
buffers.initialize(device, totalCmdBufferSize);
|
||||||
NEO::GraphicsAllocation *bufferAllocation = buffers.getCurrentBufferAllocation();
|
NEO::GraphicsAllocation *bufferAllocation = buffers.getCurrentBufferAllocation();
|
||||||
commandStream = new NEO::LinearStream(bufferAllocation->getUnderlyingBuffer(),
|
commandStream = new NEO::LinearStream(bufferAllocation->getUnderlyingBuffer(),
|
||||||
defaultQueueCmdBufferSize);
|
defaultQueueCmdBufferSize);
|
||||||
UNRECOVERABLE_IF(commandStream == nullptr);
|
UNRECOVERABLE_IF(commandStream == nullptr);
|
||||||
commandStream->replaceGraphicsAllocation(bufferAllocation);
|
commandStream->replaceGraphicsAllocation(bufferAllocation);
|
||||||
|
isCopyOnlyCommandQueue = copyOnly;
|
||||||
}
|
}
|
||||||
|
|
||||||
void CommandQueueImp::reserveLinearStreamSize(size_t size) {
|
void CommandQueueImp::reserveLinearStreamSize(size_t size) {
|
||||||
@@ -91,7 +92,7 @@ void CommandQueueImp::printFunctionsPrintfOutput() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
CommandQueue *CommandQueue::create(uint32_t productFamily, Device *device, NEO::CommandStreamReceiver *csr,
|
CommandQueue *CommandQueue::create(uint32_t productFamily, Device *device, NEO::CommandStreamReceiver *csr,
|
||||||
const ze_command_queue_desc_t *desc) {
|
const ze_command_queue_desc_t *desc, bool isCopyOnly) {
|
||||||
CommandQueueAllocatorFn allocator = nullptr;
|
CommandQueueAllocatorFn allocator = nullptr;
|
||||||
if (productFamily < IGFX_MAX_PRODUCT) {
|
if (productFamily < IGFX_MAX_PRODUCT) {
|
||||||
allocator = commandQueueFactory[productFamily];
|
allocator = commandQueueFactory[productFamily];
|
||||||
@@ -101,7 +102,7 @@ CommandQueue *CommandQueue::create(uint32_t productFamily, Device *device, NEO::
|
|||||||
if (allocator) {
|
if (allocator) {
|
||||||
commandQueue = static_cast<CommandQueueImp *>((*allocator)(device, csr, desc));
|
commandQueue = static_cast<CommandQueueImp *>((*allocator)(device, csr, desc));
|
||||||
|
|
||||||
commandQueue->initialize();
|
commandQueue->initialize(isCopyOnly);
|
||||||
}
|
}
|
||||||
return commandQueue;
|
return commandQueue;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -42,7 +42,7 @@ struct CommandQueue : _ze_command_queue_handle_t {
|
|||||||
virtual ze_result_t synchronize(uint32_t timeout) = 0;
|
virtual ze_result_t synchronize(uint32_t timeout) = 0;
|
||||||
|
|
||||||
static CommandQueue *create(uint32_t productFamily, Device *device, NEO::CommandStreamReceiver *csr,
|
static CommandQueue *create(uint32_t productFamily, Device *device, NEO::CommandStreamReceiver *csr,
|
||||||
const ze_command_queue_desc_t *desc);
|
const ze_command_queue_desc_t *desc, bool isCopyOnly);
|
||||||
|
|
||||||
static CommandQueue *fromHandle(ze_command_queue_handle_t handle) {
|
static CommandQueue *fromHandle(ze_command_queue_handle_t handle) {
|
||||||
return static_cast<CommandQueue *>(handle);
|
return static_cast<CommandQueue *>(handle);
|
||||||
@@ -58,6 +58,7 @@ struct CommandQueue : _ze_command_queue_handle_t {
|
|||||||
std::atomic<uint32_t> commandQueuePerThreadScratchSize;
|
std::atomic<uint32_t> commandQueuePerThreadScratchSize;
|
||||||
NEO::PreemptionMode commandQueuePreemptionMode = NEO::PreemptionMode::Initial;
|
NEO::PreemptionMode commandQueuePreemptionMode = NEO::PreemptionMode::Initial;
|
||||||
bool commandQueueDebugCmdsProgrammed = false;
|
bool commandQueueDebugCmdsProgrammed = false;
|
||||||
|
bool isCopyOnlyCommandQueue = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
using CommandQueueAllocatorFn = CommandQueue *(*)(Device *device, NEO::CommandStreamReceiver *csr,
|
using CommandQueueAllocatorFn = CommandQueue *(*)(Device *device, NEO::CommandStreamReceiver *csr,
|
||||||
|
|||||||
@@ -62,6 +62,13 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
|
|||||||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||||
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
|
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
|
||||||
|
|
||||||
|
for (auto i = 0u; i < numCommandLists; i++) {
|
||||||
|
auto commandList = CommandList::fromHandle(phCommandLists[i]);
|
||||||
|
if (isCopyOnlyCommandQueue != commandList->isCopyOnly()) {
|
||||||
|
return ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
size_t spaceForResidency = 0;
|
size_t spaceForResidency = 0;
|
||||||
size_t preemptionSize = 0u;
|
size_t preemptionSize = 0u;
|
||||||
size_t debuggerCmdsSize = 0;
|
size_t debuggerCmdsSize = 0;
|
||||||
@@ -122,7 +129,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
|
|||||||
if (hFence) {
|
if (hFence) {
|
||||||
fence = Fence::fromHandle(hFence);
|
fence = Fence::fromHandle(hFence);
|
||||||
spaceForResidency += residencyContainerSpaceForFence;
|
spaceForResidency += residencyContainerSpaceForFence;
|
||||||
linearStreamSizeEstimate += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(device->getHwInfo());
|
linearStreamSizeEstimate += isCopyOnlyCommandQueue ? NEO::EncodeMiFlushDW<GfxFamily>::getMiFlushDwCmdSizeForDataWrite() : NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(device->getHwInfo());
|
||||||
}
|
}
|
||||||
|
|
||||||
spaceForResidency += residencyContainerSpaceForTagWrite;
|
spaceForResidency += residencyContainerSpaceForTagWrite;
|
||||||
@@ -138,70 +145,72 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
|
|||||||
|
|
||||||
gsbaStateDirty |= !gsbaInit;
|
gsbaStateDirty |= !gsbaInit;
|
||||||
frontEndStateDirty |= !frontEndInit;
|
frontEndStateDirty |= !frontEndInit;
|
||||||
|
if (!isCopyOnlyCommandQueue) {
|
||||||
|
|
||||||
if (!gpgpuEnabled) {
|
if (!gpgpuEnabled) {
|
||||||
linearStreamSizeEstimate += estimatePipelineSelect();
|
linearStreamSizeEstimate += estimatePipelineSelect();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (frontEndStateDirty) {
|
||||||
|
linearStreamSizeEstimate += estimateFrontEndCmdSize();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (gsbaStateDirty) {
|
||||||
|
linearStreamSizeEstimate += estimateStateBaseAddressCmdSize();
|
||||||
|
}
|
||||||
|
|
||||||
|
linearStreamSizeEstimate += preemptionSize + debuggerCmdsSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (frontEndStateDirty) {
|
linearStreamSizeEstimate += isCopyOnlyCommandQueue ? NEO::EncodeMiFlushDW<GfxFamily>::getMiFlushDwCmdSizeForDataWrite() : NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(device->getHwInfo());
|
||||||
linearStreamSizeEstimate += estimateFrontEndCmdSize();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (gsbaStateDirty) {
|
|
||||||
linearStreamSizeEstimate += estimateStateBaseAddressCmdSize();
|
|
||||||
}
|
|
||||||
|
|
||||||
linearStreamSizeEstimate += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(device->getHwInfo());
|
|
||||||
|
|
||||||
linearStreamSizeEstimate += preemptionSize + debuggerCmdsSize;
|
|
||||||
|
|
||||||
size_t alignedSize = alignUp<size_t>(linearStreamSizeEstimate, minCmdBufferPtrAlign);
|
size_t alignedSize = alignUp<size_t>(linearStreamSizeEstimate, minCmdBufferPtrAlign);
|
||||||
size_t padding = alignedSize - linearStreamSizeEstimate;
|
size_t padding = alignedSize - linearStreamSizeEstimate;
|
||||||
reserveLinearStreamSize(alignedSize);
|
reserveLinearStreamSize(alignedSize);
|
||||||
NEO::LinearStream child(commandStream->getSpace(alignedSize), alignedSize);
|
NEO::LinearStream child(commandStream->getSpace(alignedSize), alignedSize);
|
||||||
|
|
||||||
if (!gpgpuEnabled) {
|
if (!isCopyOnlyCommandQueue) {
|
||||||
programPipelineSelect(child);
|
if (!gpgpuEnabled) {
|
||||||
}
|
programPipelineSelect(child);
|
||||||
|
}
|
||||||
|
|
||||||
if (!commandQueueDebugCmdsProgrammed && neoDevice->isDebuggerActive()) {
|
if (!commandQueueDebugCmdsProgrammed && neoDevice->isDebuggerActive()) {
|
||||||
NEO::PreambleHelper<GfxFamily>::programKernelDebugging(&child);
|
NEO::PreambleHelper<GfxFamily>::programKernelDebugging(&child);
|
||||||
commandQueueDebugCmdsProgrammed = true;
|
commandQueueDebugCmdsProgrammed = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (frontEndStateDirty) {
|
if (frontEndStateDirty) {
|
||||||
programFrontEnd(scratchSpaceController->getScratchPatchAddress(), child);
|
programFrontEnd(scratchSpaceController->getScratchPatchAddress(), child);
|
||||||
}
|
}
|
||||||
if (gsbaStateDirty) {
|
if (gsbaStateDirty) {
|
||||||
programGeneralStateBaseAddress(scratchSpaceController->calculateNewGSH(), child);
|
programGeneralStateBaseAddress(scratchSpaceController->calculateNewGSH(), child);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (commandQueuePreemptionMode == NEO::PreemptionMode::Initial) {
|
if (commandQueuePreemptionMode == NEO::PreemptionMode::Initial) {
|
||||||
NEO::PreemptionHelper::programCsrBaseAddress<GfxFamily>(child, *neoDevice, csr->getPreemptionAllocation());
|
NEO::PreemptionHelper::programCsrBaseAddress<GfxFamily>(child, *neoDevice, csr->getPreemptionAllocation());
|
||||||
NEO::PreemptionHelper::programStateSip<GfxFamily>(child, *neoDevice);
|
NEO::PreemptionHelper::programStateSip<GfxFamily>(child, *neoDevice);
|
||||||
NEO::PreemptionHelper::programCmdStream<GfxFamily>(child,
|
NEO::PreemptionHelper::programCmdStream<GfxFamily>(child,
|
||||||
devicePreemption,
|
devicePreemption,
|
||||||
commandQueuePreemptionMode,
|
commandQueuePreemptionMode,
|
||||||
csr->getPreemptionAllocation());
|
csr->getPreemptionAllocation());
|
||||||
commandQueuePreemptionMode = devicePreemption;
|
commandQueuePreemptionMode = devicePreemption;
|
||||||
statePreemption = commandQueuePreemptionMode;
|
statePreemption = commandQueuePreemptionMode;
|
||||||
}
|
}
|
||||||
|
|
||||||
const bool sipKernelUsed = devicePreemption == NEO::PreemptionMode::MidThread ||
|
const bool sipKernelUsed = devicePreemption == NEO::PreemptionMode::MidThread ||
|
||||||
neoDevice->isDebuggerActive();
|
neoDevice->isDebuggerActive();
|
||||||
if (devicePreemption == NEO::PreemptionMode::MidThread) {
|
if (devicePreemption == NEO::PreemptionMode::MidThread) {
|
||||||
residencyContainer.push_back(csr->getPreemptionAllocation());
|
residencyContainer.push_back(csr->getPreemptionAllocation());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sipKernelUsed) {
|
if (sipKernelUsed) {
|
||||||
auto sipIsa = NEO::SipKernel::getSipKernelAllocation(*neoDevice);
|
auto sipIsa = NEO::SipKernel::getSipKernelAllocation(*neoDevice);
|
||||||
residencyContainer.push_back(sipIsa);
|
residencyContainer.push_back(sipIsa);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (neoDevice->isDebuggerActive()) {
|
if (neoDevice->isDebuggerActive()) {
|
||||||
residencyContainer.push_back(device->getDebugSurface());
|
residencyContainer.push_back(device->getDebugSurface());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto i = 0u; i < numCommandLists; ++i) {
|
for (auto i = 0u; i < numCommandLists; ++i) {
|
||||||
auto commandList = CommandList::fromHandle(phCommandLists[i]);
|
auto commandList = CommandList::fromHandle(phCommandLists[i]);
|
||||||
auto cmdBufferAllocations = commandList->commandContainer.getCmdBufferAllocations();
|
auto cmdBufferAllocations = commandList->commandContainer.getCmdBufferAllocations();
|
||||||
@@ -254,12 +263,17 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
|
|||||||
|
|
||||||
if (hFence) {
|
if (hFence) {
|
||||||
residencyContainer.push_back(&fence->getAllocation());
|
residencyContainer.push_back(&fence->getAllocation());
|
||||||
NEO::MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(
|
if (isCopyOnlyCommandQueue) {
|
||||||
child, POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
|
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(child, fence->getGpuAddress(), Fence::STATE_SIGNALED, false, true);
|
||||||
fence->getGpuAddress(), Fence::STATE_SIGNALED, true, device->getHwInfo());
|
} else {
|
||||||
|
NEO::MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(
|
||||||
|
child, POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
|
||||||
|
fence->getGpuAddress(), Fence::STATE_SIGNALED, true, device->getHwInfo());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
dispatchTaskCountWrite(child, true);
|
dispatchTaskCountWrite(child, true);
|
||||||
|
|
||||||
residencyContainer.push_back(csr->getTagAllocation());
|
residencyContainer.push_back(csr->getTagAllocation());
|
||||||
void *endingCmd = nullptr;
|
void *endingCmd = nullptr;
|
||||||
if (directSubmissionEnabled) {
|
if (directSubmissionEnabled) {
|
||||||
@@ -334,8 +348,12 @@ void CommandQueueHw<gfxCoreFamily>::dispatchTaskCountWrite(NEO::LinearStream &co
|
|||||||
auto taskCountToWrite = csr->peekTaskCount() + 1;
|
auto taskCountToWrite = csr->peekTaskCount() + 1;
|
||||||
auto gpuAddress = static_cast<uint64_t>(csr->getTagAllocation()->getGpuAddress());
|
auto gpuAddress = static_cast<uint64_t>(csr->getTagAllocation()->getGpuAddress());
|
||||||
|
|
||||||
NEO::MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(
|
if (isCopyOnlyCommandQueue) {
|
||||||
commandStream, POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
|
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(commandStream, gpuAddress, taskCountToWrite, false, true);
|
||||||
gpuAddress, taskCountToWrite, true, device->getHwInfo());
|
} else {
|
||||||
|
NEO::MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(
|
||||||
|
commandStream, POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
|
||||||
|
gpuAddress, taskCountToWrite, true, device->getHwInfo());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} // namespace L0
|
} // namespace L0
|
||||||
|
|||||||
@@ -68,7 +68,7 @@ struct CommandQueueImp : public CommandQueue {
|
|||||||
|
|
||||||
ze_result_t synchronize(uint32_t timeout) override;
|
ze_result_t synchronize(uint32_t timeout) override;
|
||||||
|
|
||||||
void initialize();
|
void initialize(bool copyOnly);
|
||||||
|
|
||||||
Device *getDevice() { return device; }
|
Device *getDevice() { return device; }
|
||||||
|
|
||||||
|
|||||||
@@ -15,6 +15,7 @@
|
|||||||
#include "shared/source/execution_environment/root_device_environment.h"
|
#include "shared/source/execution_environment/root_device_environment.h"
|
||||||
#include "shared/source/gmm_helper/gmm_helper.h"
|
#include "shared/source/gmm_helper/gmm_helper.h"
|
||||||
#include "shared/source/helpers/constants.h"
|
#include "shared/source/helpers/constants.h"
|
||||||
|
#include "shared/source/helpers/engine_node_helper.h"
|
||||||
#include "shared/source/helpers/hw_helper.h"
|
#include "shared/source/helpers/hw_helper.h"
|
||||||
#include "shared/source/helpers/string.h"
|
#include "shared/source/helpers/string.h"
|
||||||
#include "shared/source/kernel/grf_config.h"
|
#include "shared/source/kernel/grf_config.h"
|
||||||
@@ -77,7 +78,12 @@ ze_result_t DeviceImp::canAccessPeer(ze_device_handle_t hPeerDevice, ze_bool_t *
|
|||||||
ze_result_t DeviceImp::createCommandList(const ze_command_list_desc_t *desc,
|
ze_result_t DeviceImp::createCommandList(const ze_command_list_desc_t *desc,
|
||||||
ze_command_list_handle_t *commandList) {
|
ze_command_list_handle_t *commandList) {
|
||||||
auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily;
|
auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily;
|
||||||
*commandList = CommandList::create(productFamily, this);
|
bool useBliter = false;
|
||||||
|
auto ret = isCreatedCommandListCopyOnly(desc, &useBliter, ZE_COMMAND_LIST_FLAG_COPY_ONLY);
|
||||||
|
if (ret != ZE_RESULT_SUCCESS) {
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
*commandList = CommandList::create(productFamily, this, useBliter);
|
||||||
|
|
||||||
return ZE_RESULT_SUCCESS;
|
return ZE_RESULT_SUCCESS;
|
||||||
}
|
}
|
||||||
@@ -85,7 +91,14 @@ ze_result_t DeviceImp::createCommandList(const ze_command_list_desc_t *desc,
|
|||||||
ze_result_t DeviceImp::createCommandListImmediate(const ze_command_queue_desc_t *desc,
|
ze_result_t DeviceImp::createCommandListImmediate(const ze_command_queue_desc_t *desc,
|
||||||
ze_command_list_handle_t *phCommandList) {
|
ze_command_list_handle_t *phCommandList) {
|
||||||
auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily;
|
auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily;
|
||||||
*phCommandList = CommandList::createImmediate(productFamily, this, desc, false);
|
|
||||||
|
bool useBliter = false;
|
||||||
|
auto ret = isCreatedCommandListCopyOnly(desc, &useBliter, ZE_COMMAND_QUEUE_FLAG_COPY_ONLY);
|
||||||
|
if (ret != ZE_RESULT_SUCCESS) {
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
*phCommandList = CommandList::createImmediate(productFamily, this, desc, false, useBliter);
|
||||||
|
|
||||||
return ZE_RESULT_SUCCESS;
|
return ZE_RESULT_SUCCESS;
|
||||||
}
|
}
|
||||||
@@ -94,9 +107,19 @@ ze_result_t DeviceImp::createCommandQueue(const ze_command_queue_desc_t *desc,
|
|||||||
ze_command_queue_handle_t *commandQueue) {
|
ze_command_queue_handle_t *commandQueue) {
|
||||||
auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily;
|
auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily;
|
||||||
|
|
||||||
auto csr = neoDevice->getDefaultEngine().commandStreamReceiver;
|
NEO::CommandStreamReceiver *csr = nullptr;
|
||||||
|
bool useBliter = false;
|
||||||
*commandQueue = CommandQueue::create(productFamily, this, csr, desc);
|
auto ret = isCreatedCommandListCopyOnly(desc, &useBliter, ZE_COMMAND_QUEUE_FLAG_COPY_ONLY);
|
||||||
|
if (ret != ZE_RESULT_SUCCESS) {
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
if (useBliter) {
|
||||||
|
auto &selectorCopyEngine = this->neoDevice->getDeviceById(0)->getSelectorCopyEngine();
|
||||||
|
csr = this->neoDevice->getDeviceById(0)->getEngine(NEO::EngineHelpers::getBcsEngineType(neoDevice->getHardwareInfo(), selectorCopyEngine), false).commandStreamReceiver;
|
||||||
|
} else {
|
||||||
|
csr = neoDevice->getDefaultEngine().commandStreamReceiver;
|
||||||
|
}
|
||||||
|
*commandQueue = CommandQueue::create(productFamily, this, csr, desc, useBliter);
|
||||||
|
|
||||||
return ZE_RESULT_SUCCESS;
|
return ZE_RESULT_SUCCESS;
|
||||||
}
|
}
|
||||||
@@ -515,7 +538,7 @@ ze_result_t DeviceImp::registerCLCommandQueue(cl_context context, cl_command_que
|
|||||||
|
|
||||||
auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily;
|
auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily;
|
||||||
auto csr = neoDevice->getDefaultEngine().commandStreamReceiver;
|
auto csr = neoDevice->getDefaultEngine().commandStreamReceiver;
|
||||||
*phCommandQueue = CommandQueue::create(productFamily, this, csr, &desc);
|
*phCommandQueue = CommandQueue::create(productFamily, this, csr, &desc, false);
|
||||||
|
|
||||||
return ZE_RESULT_SUCCESS;
|
return ZE_RESULT_SUCCESS;
|
||||||
}
|
}
|
||||||
@@ -574,7 +597,7 @@ Device *Device::create(DriverHandle *driverHandle, NEO::Device *neoDevice) {
|
|||||||
cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
|
cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
|
||||||
device->pageFaultCommandList =
|
device->pageFaultCommandList =
|
||||||
CommandList::createImmediate(
|
CommandList::createImmediate(
|
||||||
device->neoDevice->getHardwareInfo().platform.eProductFamily, device, &cmdQueueDesc, true);
|
device->neoDevice->getHardwareInfo().platform.eProductFamily, device, &cmdQueueDesc, true, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (neoDevice->getDeviceInfo().debuggerActive) {
|
if (neoDevice->getDeviceInfo().debuggerActive) {
|
||||||
@@ -696,4 +719,17 @@ NEO::GraphicsAllocation *DeviceImp::allocateMemoryFromHostPtr(const void *buffer
|
|||||||
return allocation;
|
return allocation;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename DescriptionType, typename ExpectedFlagType>
|
||||||
|
ze_result_t DeviceImp::isCreatedCommandListCopyOnly(const DescriptionType *desc, bool *useBliter, ExpectedFlagType flag) {
|
||||||
|
if (desc->flags & flag) {
|
||||||
|
auto hwInfo = neoDevice->getHardwareInfo();
|
||||||
|
if (hwInfo.capabilityTable.blitterOperationsSupported) {
|
||||||
|
*useBliter = true;
|
||||||
|
return ZE_RESULT_SUCCESS;
|
||||||
|
}
|
||||||
|
return ZE_RESULT_ERROR_INVALID_ENUMERATION;
|
||||||
|
}
|
||||||
|
return ZE_RESULT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace L0
|
} // namespace L0
|
||||||
|
|||||||
@@ -93,6 +93,8 @@ struct DeviceImp : public Device {
|
|||||||
CommandList *pageFaultCommandList = nullptr;
|
CommandList *pageFaultCommandList = nullptr;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
template <typename DescriptionType, typename ExpectedFlagType>
|
||||||
|
ze_result_t isCreatedCommandListCopyOnly(const DescriptionType *desc, bool *useBliter, ExpectedFlagType flag);
|
||||||
NEO::GraphicsAllocation *debugSurface = nullptr;
|
NEO::GraphicsAllocation *debugSurface = nullptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -50,7 +50,7 @@ struct Event : _ze_event_handle_t {
|
|||||||
|
|
||||||
inline ze_event_handle_t toHandle() { return this; }
|
inline ze_event_handle_t toHandle() { return this; }
|
||||||
|
|
||||||
NEO::GraphicsAllocation &getAllocation();
|
virtual NEO::GraphicsAllocation &getAllocation();
|
||||||
|
|
||||||
uint64_t getGpuAddress() { return gpuAddress; }
|
uint64_t getGpuAddress() { return gpuAddress; }
|
||||||
uint64_t getOffsetOfEventTimestampRegister(uint32_t eventTimestampReg);
|
uint64_t getOffsetOfEventTimestampRegister(uint32_t eventTimestampReg);
|
||||||
@@ -97,7 +97,7 @@ struct EventPool : _ze_event_pool_handle_t {
|
|||||||
|
|
||||||
inline ze_event_pool_handle_t toHandle() { return this; }
|
inline ze_event_pool_handle_t toHandle() { return this; }
|
||||||
|
|
||||||
NEO::GraphicsAllocation &getAllocation() { return *eventPoolAllocation; }
|
virtual NEO::GraphicsAllocation &getAllocation() { return *eventPoolAllocation; }
|
||||||
|
|
||||||
virtual uint32_t getEventSize() = 0;
|
virtual uint32_t getEventSize() = 0;
|
||||||
virtual uint32_t getNumEventTimestampsToRead() = 0;
|
virtual uint32_t getNumEventTimestampsToRead() = 0;
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ using IsSKLOrKBL = IsWithinProducts<IGFX_SKYLAKE, IGFX_KABYLAKE>;
|
|||||||
HWTEST2_F(CommandListAppendLaunchKernel, givenKernelWithSLMThenL3IsProgrammedWithSLMValue, IsSKLOrKBL) {
|
HWTEST2_F(CommandListAppendLaunchKernel, givenKernelWithSLMThenL3IsProgrammedWithSLMValue, IsSKLOrKBL) {
|
||||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||||
createKernel();
|
createKernel();
|
||||||
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device));
|
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, false));
|
||||||
ze_group_count_t groupCount{1, 1, 1};
|
ze_group_count_t groupCount{1, 1, 1};
|
||||||
|
|
||||||
EXPECT_LE(0u, kernel->kernelImmData->getDescriptor().kernelAttributes.slmInlineSize);
|
EXPECT_LE(0u, kernel->kernelImmData->getDescriptor().kernelAttributes.slmInlineSize);
|
||||||
|
|||||||
@@ -20,6 +20,8 @@ set(L0_MOCKS_SOURCES
|
|||||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_driver.cpp
|
${CMAKE_CURRENT_SOURCE_DIR}/mock_driver.cpp
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_driver_handle.h
|
${CMAKE_CURRENT_SOURCE_DIR}/mock_driver_handle.h
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_driver_handle.cpp
|
${CMAKE_CURRENT_SOURCE_DIR}/mock_driver_handle.cpp
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/mock_event.cpp
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/mock_event.h
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_kernel.h
|
${CMAKE_CURRENT_SOURCE_DIR}/mock_kernel.h
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_memory_manager.h
|
${CMAKE_CURRENT_SOURCE_DIR}/mock_memory_manager.h
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_memory_manager.cpp
|
${CMAKE_CURRENT_SOURCE_DIR}/mock_memory_manager.cpp
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
|
|||||||
using BaseClass = ::L0::CommandListCoreFamily<gfxCoreFamily>;
|
using BaseClass = ::L0::CommandListCoreFamily<gfxCoreFamily>;
|
||||||
using BaseClass::appendLaunchKernelWithParams;
|
using BaseClass::appendLaunchKernelWithParams;
|
||||||
using BaseClass::commandListPreemptionMode;
|
using BaseClass::commandListPreemptionMode;
|
||||||
|
using BaseClass::getAlignedAllocation;
|
||||||
|
|
||||||
WhiteBox() : ::L0::CommandListCoreFamily<gfxCoreFamily>(BaseClass::defaultNumIddsPerBlock) {}
|
WhiteBox() : ::L0::CommandListCoreFamily<gfxCoreFamily>(BaseClass::defaultNumIddsPerBlock) {}
|
||||||
virtual ~WhiteBox() {}
|
virtual ~WhiteBox() {}
|
||||||
@@ -149,7 +150,7 @@ struct Mock<CommandList> : public CommandList {
|
|||||||
MOCK_METHOD0(appendMIBBEnd, ze_result_t());
|
MOCK_METHOD0(appendMIBBEnd, ze_result_t());
|
||||||
MOCK_METHOD0(appendMINoop, ze_result_t());
|
MOCK_METHOD0(appendMINoop, ze_result_t());
|
||||||
MOCK_METHOD1(executeCommandListImmediate, ze_result_t(bool perforMigration));
|
MOCK_METHOD1(executeCommandListImmediate, ze_result_t(bool perforMigration));
|
||||||
MOCK_METHOD1(initialize, bool(L0::Device *device));
|
MOCK_METHOD2(initialize, bool(L0::Device *device, bool onlyCopyBlit));
|
||||||
|
|
||||||
uint8_t *batchBuffer = nullptr;
|
uint8_t *batchBuffer = nullptr;
|
||||||
NEO::GraphicsAllocation *mockAllocation = nullptr;
|
NEO::GraphicsAllocation *mockAllocation = nullptr;
|
||||||
|
|||||||
31
level_zero/core/test/unit_tests/mocks/mock_event.cpp
Normal file
31
level_zero/core/test/unit_tests/mocks/mock_event.cpp
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2020 Intel Corporation
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "mock_event.h"
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace L0 {
|
||||||
|
namespace ult {
|
||||||
|
|
||||||
|
Mock<Event>::Mock() : mockAllocation(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
|
||||||
|
&memory, reinterpret_cast<uint64_t>(&memory), 0, sizeof(memory),
|
||||||
|
MemoryPool::System4KBPages) { allocation = &mockAllocation; }
|
||||||
|
|
||||||
|
Mock<Event>::~Mock() {}
|
||||||
|
|
||||||
|
Mock<EventPool>::Mock() : pool(1) {
|
||||||
|
pool = std::vector<int>(1);
|
||||||
|
pool[0] = 0;
|
||||||
|
|
||||||
|
EXPECT_CALL(*this, getPoolSize()).WillRepeatedly(testing::Return(1));
|
||||||
|
}
|
||||||
|
|
||||||
|
Mock<EventPool>::~Mock() { pool.clear(); }
|
||||||
|
|
||||||
|
} // namespace ult
|
||||||
|
} // namespace L0
|
||||||
86
level_zero/core/test/unit_tests/mocks/mock_event.h
Normal file
86
level_zero/core/test/unit_tests/mocks/mock_event.h
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2020 Intel Corporation
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
#include "level_zero/core/source/device/device.h"
|
||||||
|
#include "level_zero/core/source/event/event.h"
|
||||||
|
#include "level_zero/core/test/unit_tests/mock.h"
|
||||||
|
#include "level_zero/core/test/unit_tests/white_box.h"
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#if defined(__clang__)
|
||||||
|
#pragma clang diagnostic push
|
||||||
|
#pragma clang diagnostic ignored "-Winconsistent-missing-override"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace L0 {
|
||||||
|
namespace ult {
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct WhiteBox<::L0::Event> : public ::L0::Event {
|
||||||
|
using BaseClass = ::L0::Event;
|
||||||
|
using BaseClass::allocation;
|
||||||
|
};
|
||||||
|
|
||||||
|
using Event = WhiteBox<::L0::Event>;
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct WhiteBox<::L0::EventPool> : public ::L0::EventPool {
|
||||||
|
using BaseClass = ::L0::EventPool;
|
||||||
|
};
|
||||||
|
|
||||||
|
using EventPool = WhiteBox<::L0::EventPool>;
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct Mock<Event> : public Event {
|
||||||
|
Mock();
|
||||||
|
~Mock() override;
|
||||||
|
|
||||||
|
MOCK_METHOD3(create, L0::Event *(::L0::EventPool *eventPool, const ze_event_desc_t *desc, ::L0::Device *device));
|
||||||
|
MOCK_METHOD0(destroy, ze_result_t());
|
||||||
|
MOCK_METHOD0(hostSignal, ze_result_t());
|
||||||
|
MOCK_METHOD1(hostSynchronize, ze_result_t(uint32_t timeout));
|
||||||
|
MOCK_METHOD0(queryStatus, ze_result_t());
|
||||||
|
MOCK_METHOD0(reset, ze_result_t());
|
||||||
|
MOCK_METHOD2(getTimestamp, ze_result_t(ze_event_timestamp_type_t timestampType, void *dstptr));
|
||||||
|
|
||||||
|
// Fake an allocation for event memory
|
||||||
|
alignas(16) uint32_t memory = -1;
|
||||||
|
NEO::GraphicsAllocation mockAllocation;
|
||||||
|
|
||||||
|
using Event::allocation;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct Mock<EventPool> : public EventPool {
|
||||||
|
Mock();
|
||||||
|
~Mock() override;
|
||||||
|
|
||||||
|
MOCK_METHOD0(destroy, ze_result_t());
|
||||||
|
MOCK_METHOD0(getPoolSize, size_t());
|
||||||
|
MOCK_METHOD0(getPoolUsedCount, uint32_t());
|
||||||
|
MOCK_METHOD1(getIpcHandle, ze_result_t(ze_ipc_event_pool_handle_t *pIpcHandle));
|
||||||
|
MOCK_METHOD0(closeIpcHandle, ze_result_t());
|
||||||
|
MOCK_METHOD2(createEvent, ze_result_t(const ze_event_desc_t *desc, ze_event_handle_t *phEvent));
|
||||||
|
MOCK_METHOD2(reserveEventFromPool, ze_result_t(int index, ::L0::Event *event));
|
||||||
|
MOCK_METHOD1(releaseEventToPool, ze_result_t(::L0::Event *event));
|
||||||
|
MOCK_METHOD0(getDevice, Device *());
|
||||||
|
MOCK_METHOD0(getEventSize, uint32_t());
|
||||||
|
MOCK_METHOD0(getNumEventTimestampsToRead, uint32_t());
|
||||||
|
|
||||||
|
std::vector<int> pool;
|
||||||
|
|
||||||
|
using EventPool::eventPoolAllocation;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace ult
|
||||||
|
} // namespace L0
|
||||||
|
|
||||||
|
#if defined(__clang__)
|
||||||
|
#pragma clang diagnostic pop
|
||||||
|
#endif
|
||||||
@@ -6,11 +6,16 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "shared/source/gmm_helper/gmm_helper.h"
|
#include "shared/source/gmm_helper/gmm_helper.h"
|
||||||
|
#include "shared/source/helpers/hw_info.h"
|
||||||
#include "shared/test/unit_test/cmd_parse/gen_cmd_parse.h"
|
#include "shared/test/unit_test/cmd_parse/gen_cmd_parse.h"
|
||||||
|
|
||||||
|
#include "opencl/test/unit_test/mocks/mock_graphics_allocation.h"
|
||||||
#include "test.h"
|
#include "test.h"
|
||||||
|
|
||||||
|
#include "level_zero/core/source/cmdlist/cmdlist_hw.h"
|
||||||
|
#include "level_zero/core/source/driver/driver_handle_imp.h"
|
||||||
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
|
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
|
||||||
|
#include "level_zero/core/test/unit_tests/mocks/mock_event.h"
|
||||||
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
|
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
|
||||||
|
|
||||||
namespace L0 {
|
namespace L0 {
|
||||||
@@ -32,7 +37,7 @@ TEST(zeCommandListCreateImmediate, redirectsToObject) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(CommandListCreate, whenCommandListIsCreatedThenItIsInitialized) {
|
TEST_F(CommandListCreate, whenCommandListIsCreatedThenItIsInitialized) {
|
||||||
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device));
|
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, false));
|
||||||
ASSERT_NE(nullptr, commandList);
|
ASSERT_NE(nullptr, commandList);
|
||||||
|
|
||||||
EXPECT_EQ(device, commandList->device);
|
EXPECT_EQ(device, commandList->device);
|
||||||
@@ -57,7 +62,7 @@ TEST_F(CommandListCreate, whenCommandListIsCreatedThenItIsInitialized) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(CommandListCreate, givenRegularCommandListThenDefaultNumIddPerBlockIsUsed) {
|
TEST_F(CommandListCreate, givenRegularCommandListThenDefaultNumIddPerBlockIsUsed) {
|
||||||
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device));
|
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, false));
|
||||||
ASSERT_NE(nullptr, commandList);
|
ASSERT_NE(nullptr, commandList);
|
||||||
|
|
||||||
const uint32_t defaultNumIdds = CommandList::defaultNumIddsPerBlock;
|
const uint32_t defaultNumIdds = CommandList::defaultNumIddsPerBlock;
|
||||||
@@ -71,7 +76,7 @@ TEST_F(CommandListCreate, givenImmediateCommandListThenCustomNumIddPerBlockUsed)
|
|||||||
ZE_COMMAND_QUEUE_MODE_DEFAULT,
|
ZE_COMMAND_QUEUE_MODE_DEFAULT,
|
||||||
ZE_COMMAND_QUEUE_PRIORITY_NORMAL,
|
ZE_COMMAND_QUEUE_PRIORITY_NORMAL,
|
||||||
0};
|
0};
|
||||||
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false));
|
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, false));
|
||||||
ASSERT_NE(nullptr, commandList);
|
ASSERT_NE(nullptr, commandList);
|
||||||
|
|
||||||
const uint32_t cmdListImmediateIdds = CommandList::commandListimmediateIddsPerBlock;
|
const uint32_t cmdListImmediateIdds = CommandList::commandListimmediateIddsPerBlock;
|
||||||
@@ -85,7 +90,7 @@ TEST_F(CommandListCreate, whenCreatingImmediateCommandListThenItHasImmediateComm
|
|||||||
ZE_COMMAND_QUEUE_MODE_DEFAULT,
|
ZE_COMMAND_QUEUE_MODE_DEFAULT,
|
||||||
ZE_COMMAND_QUEUE_PRIORITY_NORMAL,
|
ZE_COMMAND_QUEUE_PRIORITY_NORMAL,
|
||||||
0};
|
0};
|
||||||
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false));
|
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, false));
|
||||||
ASSERT_NE(nullptr, commandList);
|
ASSERT_NE(nullptr, commandList);
|
||||||
|
|
||||||
EXPECT_EQ(device, commandList->device);
|
EXPECT_EQ(device, commandList->device);
|
||||||
@@ -94,14 +99,14 @@ TEST_F(CommandListCreate, whenCreatingImmediateCommandListThenItHasImmediateComm
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(CommandListCreate, givenInvalidProductFamilyThenReturnsNullPointer) {
|
TEST_F(CommandListCreate, givenInvalidProductFamilyThenReturnsNullPointer) {
|
||||||
std::unique_ptr<L0::CommandList> commandList(CommandList::create(IGFX_UNKNOWN, device));
|
std::unique_ptr<L0::CommandList> commandList(CommandList::create(IGFX_UNKNOWN, device, false));
|
||||||
EXPECT_EQ(nullptr, commandList);
|
EXPECT_EQ(nullptr, commandList);
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST_F(CommandListCreate, whenCommandListIsCreatedThenStateBaseAddressCmdIsAddedAndCorrectlyProgrammed) {
|
HWTEST_F(CommandListCreate, whenCommandListIsCreatedThenStateBaseAddressCmdIsAddedAndCorrectlyProgrammed) {
|
||||||
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
|
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
|
||||||
|
|
||||||
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device));
|
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, false));
|
||||||
auto &commandContainer = commandList->commandContainer;
|
auto &commandContainer = commandList->commandContainer;
|
||||||
auto gmmHelper = commandContainer.getDevice()->getGmmHelper();
|
auto gmmHelper = commandContainer.getDevice()->getGmmHelper();
|
||||||
|
|
||||||
@@ -141,5 +146,265 @@ HWTEST_F(CommandListCreate, whenCommandListIsCreatedThenStateBaseAddressCmdIsAdd
|
|||||||
EXPECT_EQ(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER), cmdSba->getStatelessDataPortAccessMemoryObjectControlState());
|
EXPECT_EQ(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER), cmdSba->getStatelessDataPortAccessMemoryObjectControlState());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenCreatedThenStateBaseAddressCmdIsNotProgrammed) {
|
||||||
|
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
|
||||||
|
|
||||||
|
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, true));
|
||||||
|
auto &commandContainer = commandList->commandContainer;
|
||||||
|
|
||||||
|
GenCmdList cmdList;
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||||
|
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||||
|
auto itor = find<STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
|
||||||
|
|
||||||
|
EXPECT_EQ(cmdList.end(), itor);
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenSetBarrierThenMiFlushDWIsProgrammed) {
|
||||||
|
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
|
||||||
|
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, true));
|
||||||
|
auto &commandContainer = commandList->commandContainer;
|
||||||
|
commandList->appendBarrier(nullptr, 0, nullptr);
|
||||||
|
GenCmdList cmdList;
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||||
|
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||||
|
auto itor = find<MI_FLUSH_DW *>(cmdList.begin(), cmdList.end());
|
||||||
|
|
||||||
|
EXPECT_NE(cmdList.end(), itor);
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST_F(CommandListCreate, givenCommandListWhenSetBarrierThenPipeControlIsProgrammed) {
|
||||||
|
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||||
|
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, false));
|
||||||
|
auto &commandContainer = commandList->commandContainer;
|
||||||
|
commandList->appendBarrier(nullptr, 0, nullptr);
|
||||||
|
GenCmdList cmdList;
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||||
|
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||||
|
auto itor = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||||
|
|
||||||
|
EXPECT_NE(cmdList.end(), itor);
|
||||||
|
}
|
||||||
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
|
class MockCommandList : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>> {
|
||||||
|
public:
|
||||||
|
MockCommandList() : WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>(1) {}
|
||||||
|
|
||||||
|
AlignedAllocationData getAlignedAllocation(L0::Device *device, const void *buffer, uint64_t bufferSize) override {
|
||||||
|
return {0, 0, nullptr, true};
|
||||||
|
}
|
||||||
|
ze_result_t appendMemoryCopyKernelWithGA(void *dstPtr,
|
||||||
|
NEO::GraphicsAllocation *dstPtrAlloc,
|
||||||
|
uint64_t dstOffset,
|
||||||
|
void *srcPtr,
|
||||||
|
NEO::GraphicsAllocation *srcPtrAlloc,
|
||||||
|
uint64_t srcOffset,
|
||||||
|
uint32_t size,
|
||||||
|
uint32_t elementSize,
|
||||||
|
Builtin builtin) override {
|
||||||
|
appendMemoryCopyKernelWithGACalledTimes++;
|
||||||
|
return ZE_RESULT_SUCCESS;
|
||||||
|
}
|
||||||
|
virtual ze_result_t appendMemoryCopyBlit(NEO::GraphicsAllocation *dstPtrAlloc,
|
||||||
|
uint64_t dstOffset,
|
||||||
|
NEO::GraphicsAllocation *srcPtrAlloc,
|
||||||
|
uint64_t srcOffset, uint32_t size) override {
|
||||||
|
appendMemoryCopyBlitCalledTimes++;
|
||||||
|
return ZE_RESULT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual ze_result_t appendMemoryCopyBlitRegion(const void *srcptr,
|
||||||
|
const void *dstptr,
|
||||||
|
ze_copy_region_t srcRegion,
|
||||||
|
ze_copy_region_t dstRegion, Vec3<size_t> copySize,
|
||||||
|
size_t srcRowPitch, size_t srcSlicePitch,
|
||||||
|
size_t dstRowPitch, size_t dstSlicePitch,
|
||||||
|
size_t srcSize, size_t dstSize) override {
|
||||||
|
appendMemoryCopyBlitRegionCalledTimes++;
|
||||||
|
return ZE_RESULT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual ze_result_t appendMemoryCopyKernel2d(const void *dstptr, const void *srcptr,
|
||||||
|
Builtin builtin, const ze_copy_region_t *dstRegion,
|
||||||
|
uint32_t dstPitch, size_t dstOffset,
|
||||||
|
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
|
||||||
|
size_t srcOffset, ze_event_handle_t hSignalEvent,
|
||||||
|
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override {
|
||||||
|
appendMemoryCopyKernel2dCalledTimes++;
|
||||||
|
return ZE_RESULT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual ze_result_t appendMemoryCopyKernel3d(const void *dstptr, const void *srcptr,
|
||||||
|
Builtin builtin, const ze_copy_region_t *dstRegion,
|
||||||
|
uint32_t dstPitch, uint32_t dstSlicePitch, size_t dstOffset,
|
||||||
|
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
|
||||||
|
uint32_t srcSlicePitch, size_t srcOffset,
|
||||||
|
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
|
||||||
|
ze_event_handle_t *phWaitEvents) override {
|
||||||
|
appendMemoryCopyKernel3dalledTimes++;
|
||||||
|
return ZE_RESULT_SUCCESS;
|
||||||
|
}
|
||||||
|
uint32_t appendMemoryCopyKernelWithGACalledTimes = 0;
|
||||||
|
uint32_t appendMemoryCopyBlitCalledTimes = 0;
|
||||||
|
uint32_t appendMemoryCopyBlitRegionCalledTimes = 0;
|
||||||
|
uint32_t appendMemoryCopyKernel2dCalledTimes = 0;
|
||||||
|
uint32_t appendMemoryCopyKernel3dalledTimes = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using Platforms = IsAtLeastProduct<IGFX_SKYLAKE>;
|
||||||
|
|
||||||
|
HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyCalledThenAppendMemoryCopyWithappendMemoryCopyKernelWithGACalled, Platforms) {
|
||||||
|
MockCommandList<gfxCoreFamily> cmdList;
|
||||||
|
cmdList.initialize(device, false);
|
||||||
|
void *srcPtr = reinterpret_cast<void *>(0x1234);
|
||||||
|
void *dstPtr = reinterpret_cast<void *>(0x2345);
|
||||||
|
cmdList.appendMemoryCopy(dstPtr, srcPtr, 0x1001, nullptr, 0, nullptr);
|
||||||
|
EXPECT_GT(cmdList.appendMemoryCopyKernelWithGACalledTimes, 0u);
|
||||||
|
EXPECT_EQ(cmdList.appendMemoryCopyBlitCalledTimes, 0u);
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyCalledThenAppendMemoryCopyWithappendMemoryCopyWithBliterCalled, Platforms) {
|
||||||
|
MockCommandList<gfxCoreFamily> cmdList;
|
||||||
|
cmdList.initialize(device, true);
|
||||||
|
void *srcPtr = reinterpret_cast<void *>(0x1234);
|
||||||
|
void *dstPtr = reinterpret_cast<void *>(0x2345);
|
||||||
|
cmdList.appendMemoryCopy(dstPtr, srcPtr, 0x1001, nullptr, 0, nullptr);
|
||||||
|
EXPECT_EQ(cmdList.appendMemoryCopyKernelWithGACalledTimes, 0u);
|
||||||
|
EXPECT_GT(cmdList.appendMemoryCopyBlitCalledTimes, 0u);
|
||||||
|
}
|
||||||
|
|
||||||
|
class MockDriverHandle : public L0::DriverHandleImp {
|
||||||
|
public:
|
||||||
|
bool findAllocationDataForRange(const void *buffer,
|
||||||
|
size_t size,
|
||||||
|
NEO::SvmAllocationData **allocData) override {
|
||||||
|
if (allocData) {
|
||||||
|
*allocData = &data;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
NEO::SvmAllocationData data = {};
|
||||||
|
};
|
||||||
|
|
||||||
|
HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyRegionCalledThenAppendMemoryCopyWithappendMemoryCopyWithBliterCalled, Platforms) {
|
||||||
|
MockCommandList<gfxCoreFamily> cmdList;
|
||||||
|
cmdList.initialize(device, true);
|
||||||
|
MockDriverHandle driverHandle;
|
||||||
|
device->setDriverHandle(&driverHandle);
|
||||||
|
void *srcPtr = reinterpret_cast<void *>(0x1234);
|
||||||
|
void *dstPtr = reinterpret_cast<void *>(0x2345);
|
||||||
|
ze_copy_region_t dstRegion = {};
|
||||||
|
ze_copy_region_t srcRegion = {};
|
||||||
|
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr);
|
||||||
|
EXPECT_GT(cmdList.appendMemoryCopyBlitRegionCalledTimes, 0u);
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(CommandListCreate, givenCommandListAnd3DWhbufferenMemoryCopyRegionCalledThenCopyKernel3DCalled, Platforms) {
|
||||||
|
MockCommandList<gfxCoreFamily> cmdList;
|
||||||
|
cmdList.initialize(device, false);
|
||||||
|
MockDriverHandle driverHandle;
|
||||||
|
device->setDriverHandle(&driverHandle);
|
||||||
|
void *srcPtr = reinterpret_cast<void *>(0x1234);
|
||||||
|
void *dstPtr = reinterpret_cast<void *>(0x2345);
|
||||||
|
ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2};
|
||||||
|
ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2};
|
||||||
|
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr);
|
||||||
|
EXPECT_EQ(cmdList.appendMemoryCopyBlitRegionCalledTimes, 0u);
|
||||||
|
EXPECT_GT(cmdList.appendMemoryCopyKernel3dalledTimes, 0u);
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(CommandListCreate, givenCommandListAnd2DWhbufferenMemoryCopyRegionCalledThenCopyKernel2DCalled, Platforms) {
|
||||||
|
MockCommandList<gfxCoreFamily> cmdList;
|
||||||
|
cmdList.initialize(device, false);
|
||||||
|
MockDriverHandle driverHandle;
|
||||||
|
device->setDriverHandle(&driverHandle);
|
||||||
|
void *srcPtr = reinterpret_cast<void *>(0x1234);
|
||||||
|
void *dstPtr = reinterpret_cast<void *>(0x2345);
|
||||||
|
ze_copy_region_t dstRegion = {4, 4, 0, 2, 2, 1};
|
||||||
|
ze_copy_region_t srcRegion = {4, 4, 0, 2, 2, 1};
|
||||||
|
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr);
|
||||||
|
EXPECT_EQ(cmdList.appendMemoryCopyBlitRegionCalledTimes, 0u);
|
||||||
|
EXPECT_GT(cmdList.appendMemoryCopyKernel2dCalledTimes, 0u);
|
||||||
|
}
|
||||||
|
|
||||||
|
class MockEvent : public Mock<Event> {
|
||||||
|
public:
|
||||||
|
MockEvent() {
|
||||||
|
mockAllocation.reset(new NEO::MockGraphicsAllocation(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
|
||||||
|
reinterpret_cast<void *>(0x1234), 0x1000, 0, sizeof(uint32_t),
|
||||||
|
MemoryPool::System4KBPages));
|
||||||
|
gpuAddress = mockAllocation->getGpuAddress();
|
||||||
|
}
|
||||||
|
NEO::GraphicsAllocation &getAllocation() override {
|
||||||
|
return *mockAllocation.get();
|
||||||
|
}
|
||||||
|
std::unique_ptr<NEO::GraphicsAllocation> mockAllocation;
|
||||||
|
};
|
||||||
|
|
||||||
|
HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenAppendSignalEventThenMiFlushDWIsProgrammed) {
|
||||||
|
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
|
||||||
|
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, true));
|
||||||
|
auto &commandContainer = commandList->commandContainer;
|
||||||
|
MockEvent event;
|
||||||
|
event.waitScope = ZE_EVENT_SCOPE_FLAG_NONE;
|
||||||
|
event.signalScope = ZE_EVENT_SCOPE_FLAG_NONE;
|
||||||
|
commandList->appendSignalEvent(event.toHandle());
|
||||||
|
GenCmdList cmdList;
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||||
|
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||||
|
auto itor = find<MI_FLUSH_DW *>(cmdList.begin(), cmdList.end());
|
||||||
|
|
||||||
|
EXPECT_NE(cmdList.end(), itor);
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST_F(CommandListCreate, givenCommandListyWhenAppendSignalEventThePipeControlIsProgrammed) {
|
||||||
|
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||||
|
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, false));
|
||||||
|
auto &commandContainer = commandList->commandContainer;
|
||||||
|
MockEvent event;
|
||||||
|
event.waitScope = ZE_EVENT_SCOPE_FLAG_NONE;
|
||||||
|
event.signalScope = ZE_EVENT_SCOPE_FLAG_NONE;
|
||||||
|
commandList->appendSignalEvent(event.toHandle());
|
||||||
|
GenCmdList cmdList;
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||||
|
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||||
|
auto itor = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||||
|
|
||||||
|
EXPECT_NE(cmdList.end(), itor);
|
||||||
|
}
|
||||||
|
HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenAppendWaitEventsWithDcFlushThenMiFlushDWIsProgrammed) {
|
||||||
|
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
|
||||||
|
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, true));
|
||||||
|
auto &commandContainer = commandList->commandContainer;
|
||||||
|
MockEvent event;
|
||||||
|
event.signalScope = ZE_EVENT_SCOPE_FLAG_NONE;
|
||||||
|
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||||
|
auto eventHandle = event.toHandle();
|
||||||
|
commandList->appendWaitOnEvents(1, &eventHandle);
|
||||||
|
GenCmdList cmdList;
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||||
|
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||||
|
auto itor = find<MI_FLUSH_DW *>(cmdList.begin(), cmdList.end());
|
||||||
|
|
||||||
|
EXPECT_NE(cmdList.end(), itor);
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST_F(CommandListCreate, givenCommandListyWhenAppendWaitEventsWithDcFlushThePipeControlIsProgrammed) {
|
||||||
|
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||||
|
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, false));
|
||||||
|
auto &commandContainer = commandList->commandContainer;
|
||||||
|
MockEvent event;
|
||||||
|
event.signalScope = ZE_EVENT_SCOPE_FLAG_NONE;
|
||||||
|
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||||
|
auto eventHandle = event.toHandle();
|
||||||
|
commandList->appendWaitOnEvents(1, &eventHandle);
|
||||||
|
GenCmdList cmdList;
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||||
|
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||||
|
auto itor = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||||
|
|
||||||
|
EXPECT_NE(cmdList.end(), itor);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace ult
|
} // namespace ult
|
||||||
} // namespace L0
|
} // namespace L0
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenAp
|
|||||||
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
|
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
|
||||||
createKernel();
|
createKernel();
|
||||||
|
|
||||||
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device));
|
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, false));
|
||||||
|
|
||||||
auto &commandContainer = commandList->commandContainer;
|
auto &commandContainer = commandList->commandContainer;
|
||||||
const auto stream = commandContainer.getCommandStream();
|
const auto stream = commandContainer.getCommandStream();
|
||||||
@@ -59,7 +59,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandListAppendLaunchKernel, givenFunctionWhenBind
|
|||||||
createKernel();
|
createKernel();
|
||||||
|
|
||||||
ze_group_count_t groupCount{1, 1, 1};
|
ze_group_count_t groupCount{1, 1, 1};
|
||||||
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device));
|
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, false));
|
||||||
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr);
|
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr);
|
||||||
|
|
||||||
auto commandStream = commandList->commandContainer.getCommandStream();
|
auto commandStream = commandList->commandContainer.getCommandStream();
|
||||||
@@ -86,7 +86,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandListAppendLaunchKernel, givenFunctionWhenBind
|
|||||||
|
|
||||||
HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfUsedWhenAppendedToCommandListThenKernelIsStored) {
|
HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfUsedWhenAppendedToCommandListThenKernelIsStored) {
|
||||||
createKernel();
|
createKernel();
|
||||||
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device));
|
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, false));
|
||||||
ze_group_count_t groupCount{1, 1, 1};
|
ze_group_count_t groupCount{1, 1, 1};
|
||||||
|
|
||||||
EXPECT_TRUE(kernel->kernelImmData->getDescriptor().kernelAttributes.flags.usesPrintf);
|
EXPECT_TRUE(kernel->kernelImmData->getDescriptor().kernelAttributes.flags.usesPrintf);
|
||||||
@@ -100,7 +100,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfUsedWhenAppendedToC
|
|||||||
|
|
||||||
HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfUsedWhenAppendedToCommandListMultipleTimesThenKernelIsStoredOnce) {
|
HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfUsedWhenAppendedToCommandListMultipleTimesThenKernelIsStoredOnce) {
|
||||||
createKernel();
|
createKernel();
|
||||||
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device));
|
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, false));
|
||||||
ze_group_count_t groupCount{1, 1, 1};
|
ze_group_count_t groupCount{1, 1, 1};
|
||||||
|
|
||||||
EXPECT_TRUE(kernel->kernelImmData->getDescriptor().kernelAttributes.flags.usesPrintf);
|
EXPECT_TRUE(kernel->kernelImmData->getDescriptor().kernelAttributes.flags.usesPrintf);
|
||||||
@@ -118,7 +118,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfUsedWhenAppendedToC
|
|||||||
|
|
||||||
HWTEST_F(CommandListAppendLaunchKernel, WhenAppendingMultipleTimesThenSshIsNotDepletedButReallocated) {
|
HWTEST_F(CommandListAppendLaunchKernel, WhenAppendingMultipleTimesThenSshIsNotDepletedButReallocated) {
|
||||||
createKernel();
|
createKernel();
|
||||||
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device));
|
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, false));
|
||||||
ze_group_count_t groupCount{1, 1, 1};
|
ze_group_count_t groupCount{1, 1, 1};
|
||||||
|
|
||||||
auto kernelSshSize = kernel->getSurfaceStateHeapDataSize();
|
auto kernelSshSize = kernel->getSurfaceStateHeapDataSize();
|
||||||
@@ -143,7 +143,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, WhenAppendingFunctionThenUsedCmdBufferS
|
|||||||
ze_group_count_t groupCount{1, 1, 1};
|
ze_group_count_t groupCount{1, 1, 1};
|
||||||
|
|
||||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||||
bool ret = commandList->initialize(device);
|
bool ret = commandList->initialize(device, false);
|
||||||
ASSERT_TRUE(ret);
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
auto sizeBefore = commandList->commandContainer.getCommandStream()->getUsed();
|
auto sizeBefore = commandList->commandContainer.getCommandStream()->getUsed();
|
||||||
|
|||||||
@@ -9,6 +9,7 @@
|
|||||||
#include "shared/test/unit_test/helpers/debug_manager_state_restore.h"
|
#include "shared/test/unit_test/helpers/debug_manager_state_restore.h"
|
||||||
#include "shared/test/unit_test/helpers/default_hw_info.h"
|
#include "shared/test/unit_test/helpers/default_hw_info.h"
|
||||||
|
|
||||||
|
#include "opencl/test/unit_test/mocks/mock_graphics_allocation.h"
|
||||||
#include "test.h"
|
#include "test.h"
|
||||||
|
|
||||||
#include "level_zero/core/source/driver/driver_handle_imp.h"
|
#include "level_zero/core/source/driver/driver_handle_imp.h"
|
||||||
@@ -34,7 +35,8 @@ TEST_F(CommandQueueCreate, whenCreatingCommandQueueThenItIsInitialized) {
|
|||||||
L0::CommandQueue *commandQueue = CommandQueue::create(productFamily,
|
L0::CommandQueue *commandQueue = CommandQueue::create(productFamily,
|
||||||
device,
|
device,
|
||||||
csr.get(),
|
csr.get(),
|
||||||
&desc);
|
&desc,
|
||||||
|
false);
|
||||||
ASSERT_NE(nullptr, commandQueue);
|
ASSERT_NE(nullptr, commandQueue);
|
||||||
|
|
||||||
L0::CommandQueueImp *commandQueueImp = reinterpret_cast<L0::CommandQueueImp *>(commandQueue);
|
L0::CommandQueueImp *commandQueueImp = reinterpret_cast<L0::CommandQueueImp *>(commandQueue);
|
||||||
@@ -90,7 +92,7 @@ HWTEST2_F(CommandQueueProgramSBATest, whenCreatingCommandQueueThenItIsInitialize
|
|||||||
desc.version = ZE_COMMAND_QUEUE_DESC_VERSION_CURRENT;
|
desc.version = ZE_COMMAND_QUEUE_DESC_VERSION_CURRENT;
|
||||||
auto csr = std::unique_ptr<NEO::CommandStreamReceiver>(neoDevice->createCommandStreamReceiver());
|
auto csr = std::unique_ptr<NEO::CommandStreamReceiver>(neoDevice->createCommandStreamReceiver());
|
||||||
auto commandQueue = new MockCommandQueueHw<gfxCoreFamily>(device, csr.get(), &desc);
|
auto commandQueue = new MockCommandQueueHw<gfxCoreFamily>(device, csr.get(), &desc);
|
||||||
commandQueue->initialize();
|
commandQueue->initialize(false);
|
||||||
|
|
||||||
uint32_t alignedSize = 4096u;
|
uint32_t alignedSize = 4096u;
|
||||||
NEO::LinearStream child(commandQueue->commandStream->getSpace(alignedSize), alignedSize);
|
NEO::LinearStream child(commandQueue->commandStream->getSpace(alignedSize), alignedSize);
|
||||||
@@ -103,5 +105,56 @@ HWTEST2_F(CommandQueueProgramSBATest, whenCreatingCommandQueueThenItIsInitialize
|
|||||||
commandQueue->destroy();
|
commandQueue->destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(CommandQueueCreate, givenCmdQueueWithBlitCopyWhenExecutingNonCopyBlitCommandListThenWrongCommandListStatusReturned) {
|
||||||
|
const ze_command_queue_desc_t desc = {
|
||||||
|
ZE_COMMAND_QUEUE_DESC_VERSION_CURRENT,
|
||||||
|
ZE_COMMAND_QUEUE_FLAG_COPY_ONLY,
|
||||||
|
ZE_COMMAND_QUEUE_MODE_DEFAULT,
|
||||||
|
ZE_COMMAND_QUEUE_PRIORITY_NORMAL,
|
||||||
|
0};
|
||||||
|
|
||||||
|
auto csr = std::unique_ptr<NEO::CommandStreamReceiver>(neoDevice->createCommandStreamReceiver());
|
||||||
|
|
||||||
|
L0::CommandQueue *commandQueue = CommandQueue::create(productFamily,
|
||||||
|
device,
|
||||||
|
csr.get(),
|
||||||
|
&desc,
|
||||||
|
true);
|
||||||
|
ASSERT_NE(nullptr, commandQueue);
|
||||||
|
|
||||||
|
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, false));
|
||||||
|
auto commandListHandle = commandList->toHandle();
|
||||||
|
auto status = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
|
||||||
|
|
||||||
|
EXPECT_EQ(status, ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE);
|
||||||
|
|
||||||
|
commandQueue->destroy();
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(CommandQueueCreate, givenCmdQueueWithBlitCopyWhenExecutingCopyBlitCommandListThenSuccessReturned) {
|
||||||
|
const ze_command_queue_desc_t desc = {
|
||||||
|
ZE_COMMAND_QUEUE_DESC_VERSION_CURRENT,
|
||||||
|
ZE_COMMAND_QUEUE_FLAG_COPY_ONLY,
|
||||||
|
ZE_COMMAND_QUEUE_MODE_DEFAULT,
|
||||||
|
ZE_COMMAND_QUEUE_PRIORITY_NORMAL,
|
||||||
|
0};
|
||||||
|
|
||||||
|
auto defaultCsr = neoDevice->getDefaultEngine().commandStreamReceiver;
|
||||||
|
L0::CommandQueue *commandQueue = CommandQueue::create(productFamily,
|
||||||
|
device,
|
||||||
|
defaultCsr,
|
||||||
|
&desc,
|
||||||
|
true);
|
||||||
|
ASSERT_NE(nullptr, commandQueue);
|
||||||
|
|
||||||
|
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, true));
|
||||||
|
auto commandListHandle = commandList->toHandle();
|
||||||
|
auto status = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
|
||||||
|
|
||||||
|
EXPECT_EQ(status, ZE_RESULT_SUCCESS);
|
||||||
|
|
||||||
|
commandQueue->destroy();
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace ult
|
} // namespace ult
|
||||||
} // namespace L0
|
} // namespace L0
|
||||||
@@ -72,13 +72,13 @@ using CommandQueueDebugCommandsTest = Test<ActiveDebuggerFixture>;
|
|||||||
|
|
||||||
HWTEST_F(CommandQueueDebugCommandsTest, givenDebuggingEnabledWhenCommandListIsExecutedThenKernelDebugCommandsAreAdded) {
|
HWTEST_F(CommandQueueDebugCommandsTest, givenDebuggingEnabledWhenCommandListIsExecutedThenKernelDebugCommandsAreAdded) {
|
||||||
ze_command_queue_desc_t queueDesc = {};
|
ze_command_queue_desc_t queueDesc = {};
|
||||||
auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, deviceL0, device->getDefaultEngine().commandStreamReceiver, &queueDesc));
|
auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, deviceL0, device->getDefaultEngine().commandStreamReceiver, &queueDesc, false));
|
||||||
ASSERT_NE(nullptr, commandQueue->commandStream);
|
ASSERT_NE(nullptr, commandQueue->commandStream);
|
||||||
|
|
||||||
auto usedSpaceBefore = commandQueue->commandStream->getUsed();
|
auto usedSpaceBefore = commandQueue->commandStream->getUsed();
|
||||||
|
|
||||||
ze_command_list_handle_t commandLists[] = {
|
ze_command_list_handle_t commandLists[] = {
|
||||||
CommandList::create(productFamily, deviceL0)->toHandle()};
|
CommandList::create(productFamily, deviceL0, false)->toHandle()};
|
||||||
uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]);
|
uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]);
|
||||||
|
|
||||||
auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
|
auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
|
||||||
|
|||||||
@@ -224,7 +224,7 @@ struct EncodeBatchBufferStartOrEnd {
|
|||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
struct EncodeMiFlushDW {
|
struct EncodeMiFlushDW {
|
||||||
using MI_FLUSH_DW = typename GfxFamily::MI_FLUSH_DW;
|
using MI_FLUSH_DW = typename GfxFamily::MI_FLUSH_DW;
|
||||||
static void programMiFlushDw(LinearStream &commandStream, uint64_t immediateDataGpuAddress, uint64_t immediateData);
|
static void programMiFlushDw(LinearStream &commandStream, uint64_t immediateDataGpuAddress, uint64_t immediateData, bool timeStampOperation, bool commandWithPostSync);
|
||||||
static void programMiFlushDwWA(LinearStream &commandStream);
|
static void programMiFlushDwWA(LinearStream &commandStream);
|
||||||
static void appendMiFlushDw(MI_FLUSH_DW *miFlushDwCmd);
|
static void appendMiFlushDw(MI_FLUSH_DW *miFlushDwCmd);
|
||||||
static size_t getMiFlushDwCmdSizeForDataWrite();
|
static size_t getMiFlushDwCmdSizeForDataWrite();
|
||||||
|
|||||||
@@ -395,14 +395,17 @@ void EncodeSurfaceState<Family>::getSshAlignedPointer(uintptr_t &ptr, size_t &of
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
void EncodeMiFlushDW<GfxFamily>::programMiFlushDw(LinearStream &commandStream, uint64_t immediateDataGpuAddress, uint64_t immediateData) {
|
void EncodeMiFlushDW<GfxFamily>::programMiFlushDw(LinearStream &commandStream, uint64_t immediateDataGpuAddress, uint64_t immediateData, bool timeStampOperation, bool commandWithPostSync) {
|
||||||
programMiFlushDwWA(commandStream);
|
programMiFlushDwWA(commandStream);
|
||||||
|
|
||||||
auto miFlushDwCmd = commandStream.getSpaceForCmd<MI_FLUSH_DW>();
|
auto miFlushDwCmd = commandStream.getSpaceForCmd<MI_FLUSH_DW>();
|
||||||
*miFlushDwCmd = GfxFamily::cmdInitMiFlushDw;
|
*miFlushDwCmd = GfxFamily::cmdInitMiFlushDw;
|
||||||
miFlushDwCmd->setPostSyncOperation(MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD);
|
if (commandWithPostSync) {
|
||||||
miFlushDwCmd->setDestinationAddress(immediateDataGpuAddress);
|
auto postSyncType = timeStampOperation ? MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_TIMESTAMP_REGISTER : MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD;
|
||||||
miFlushDwCmd->setImmediateData(immediateData);
|
miFlushDwCmd->setPostSyncOperation(postSyncType);
|
||||||
|
miFlushDwCmd->setDestinationAddress(immediateDataGpuAddress);
|
||||||
|
miFlushDwCmd->setImmediateData(immediateData);
|
||||||
|
}
|
||||||
appendMiFlushDw(miFlushDwCmd);
|
appendMiFlushDw(miFlushDwCmd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -849,7 +849,7 @@ uint32_t CommandStreamReceiverHw<GfxFamily>::blitBuffer(const BlitPropertiesCont
|
|||||||
|
|
||||||
if (blitProperties.outputTimestampPacket) {
|
if (blitProperties.outputTimestampPacket) {
|
||||||
auto timestampPacketGpuAddress = blitProperties.outputTimestampPacket->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd);
|
auto timestampPacketGpuAddress = blitProperties.outputTimestampPacket->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd);
|
||||||
EncodeMiFlushDW<GfxFamily>::programMiFlushDw(commandStream, timestampPacketGpuAddress, 0);
|
EncodeMiFlushDW<GfxFamily>::programMiFlushDw(commandStream, timestampPacketGpuAddress, 0, true, true);
|
||||||
makeResident(*blitProperties.outputTimestampPacket->getBaseGraphicsAllocation());
|
makeResident(*blitProperties.outputTimestampPacket->getBaseGraphicsAllocation());
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -861,7 +861,7 @@ uint32_t CommandStreamReceiverHw<GfxFamily>::blitBuffer(const BlitPropertiesCont
|
|||||||
|
|
||||||
MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(commandStream, tagAllocation->getGpuAddress(), peekHwInfo());
|
MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(commandStream, tagAllocation->getGpuAddress(), peekHwInfo());
|
||||||
|
|
||||||
EncodeMiFlushDW<GfxFamily>::programMiFlushDw(commandStream, tagAllocation->getGpuAddress(), newTaskCount);
|
EncodeMiFlushDW<GfxFamily>::programMiFlushDw(commandStream, tagAllocation->getGpuAddress(), newTaskCount, false, true);
|
||||||
|
|
||||||
MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(commandStream, tagAllocation->getGpuAddress(), peekHwInfo());
|
MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(commandStream, tagAllocation->getGpuAddress(), peekHwInfo());
|
||||||
|
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ HWTEST_F(CommandEncoderTests, givenImmDataWriteWhenProgrammingMiFlushDwThenSetAl
|
|||||||
uint64_t gpuAddress = 0x1230000;
|
uint64_t gpuAddress = 0x1230000;
|
||||||
uint64_t immData = 456;
|
uint64_t immData = 456;
|
||||||
|
|
||||||
EncodeMiFlushDW<FamilyType>::programMiFlushDw(linearStream, gpuAddress, immData);
|
EncodeMiFlushDW<FamilyType>::programMiFlushDw(linearStream, gpuAddress, immData, false, true);
|
||||||
auto miFlushDwCmd = reinterpret_cast<MI_FLUSH_DW *>(buffer);
|
auto miFlushDwCmd = reinterpret_cast<MI_FLUSH_DW *>(buffer);
|
||||||
|
|
||||||
unsigned int sizeMultiplier = 1;
|
unsigned int sizeMultiplier = 1;
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ set(NEO_CORE_ENCODERS_TESTS
|
|||||||
${CMAKE_CURRENT_SOURCE_DIR}/test_encode_dispatch_kernel.cpp
|
${CMAKE_CURRENT_SOURCE_DIR}/test_encode_dispatch_kernel.cpp
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/test_encode_math.cpp
|
${CMAKE_CURRENT_SOURCE_DIR}/test_encode_math.cpp
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/test_encode_media_interface_descriptor.cpp
|
${CMAKE_CURRENT_SOURCE_DIR}/test_encode_media_interface_descriptor.cpp
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/test_encode_mi_flush_dw.cpp
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/test_encode_semaphore.cpp
|
${CMAKE_CURRENT_SOURCE_DIR}/test_encode_semaphore.cpp
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/test_encode_set_mmio.cpp
|
${CMAKE_CURRENT_SOURCE_DIR}/test_encode_set_mmio.cpp
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/test_encode_states.cpp
|
${CMAKE_CURRENT_SOURCE_DIR}/test_encode_states.cpp
|
||||||
|
|||||||
124
shared/test/unit_test/encoders/test_encode_mi_flush_dw.cpp
Normal file
124
shared/test/unit_test/encoders/test_encode_mi_flush_dw.cpp
Normal file
@@ -0,0 +1,124 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2017-2020 Intel Corporation
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "shared/source/command_container/command_encoder.h"
|
||||||
|
#include "shared/source/command_stream/linear_stream.h"
|
||||||
|
#include "shared/source/helpers/blit_commands_helper.h"
|
||||||
|
#include "shared/test/unit_test/cmd_parse/gen_cmd_parse.h"
|
||||||
|
|
||||||
|
#include "opencl/test/unit_test/mocks/mock_graphics_allocation.h"
|
||||||
|
#include "test.h"
|
||||||
|
|
||||||
|
using namespace NEO;
|
||||||
|
|
||||||
|
using EncodeMiFlushDWTest = testing::Test;
|
||||||
|
|
||||||
|
HWTEST_F(EncodeMiFlushDWTest, GivenLinearStreamWhenCllaedEncodeWithNoPostSyncThenPostSyncNotWriteIsSet) {
|
||||||
|
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
|
||||||
|
uint32_t pCmdBuffer[1024];
|
||||||
|
MockGraphicsAllocation gfxAllocation((void *)pCmdBuffer, sizeof(pCmdBuffer));
|
||||||
|
LinearStream stream(&gfxAllocation);
|
||||||
|
|
||||||
|
EncodeMiFlushDW<FamilyType>::programMiFlushDw(stream, 0, 0, false, false);
|
||||||
|
|
||||||
|
GenCmdList commands;
|
||||||
|
CmdParse<FamilyType>::parseCommandBuffer(commands, stream.getCpuBase(), stream.getUsed());
|
||||||
|
|
||||||
|
auto itor = commands.begin();
|
||||||
|
itor = find<MI_FLUSH_DW *>(itor, commands.end());
|
||||||
|
ASSERT_NE(itor, commands.end());
|
||||||
|
auto cmd = genCmdCast<MI_FLUSH_DW *>(*itor);
|
||||||
|
EXPECT_EQ(cmd->getPostSyncOperation(), MI_FLUSH_DW::POST_SYNC_OPERATION_NO_WRITE);
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST_F(EncodeMiFlushDWTest, GivenLinearStreamWhenCllaedEncodeWithPostSyncDataThenPostSyncDataIsSet) {
|
||||||
|
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
|
||||||
|
uint32_t pCmdBuffer[1024];
|
||||||
|
MockGraphicsAllocation gfxAllocation((void *)pCmdBuffer, sizeof(pCmdBuffer));
|
||||||
|
LinearStream stream(&gfxAllocation);
|
||||||
|
|
||||||
|
uint64_t address = 0x1000;
|
||||||
|
uint64_t data = 0x4321;
|
||||||
|
|
||||||
|
EncodeMiFlushDW<FamilyType>::programMiFlushDw(stream, address, data, false, true);
|
||||||
|
|
||||||
|
GenCmdList commands;
|
||||||
|
CmdParse<FamilyType>::parseCommandBuffer(commands, stream.getCpuBase(), stream.getUsed());
|
||||||
|
|
||||||
|
auto itor = commands.begin();
|
||||||
|
itor = find<MI_FLUSH_DW *>(itor, commands.end());
|
||||||
|
ASSERT_NE(itor, commands.end());
|
||||||
|
bool miFlushWithPostSyncFound = false;
|
||||||
|
for (; itor != commands.end(); itor++) {
|
||||||
|
auto cmd = genCmdCast<MI_FLUSH_DW *>(*itor);
|
||||||
|
if (cmd->getPostSyncOperation() != MI_FLUSH_DW::POST_SYNC_OPERATION_NO_WRITE) {
|
||||||
|
EXPECT_EQ(cmd->getDestinationAddress(), address);
|
||||||
|
EXPECT_EQ(cmd->getImmediateData(), data);
|
||||||
|
miFlushWithPostSyncFound = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
EXPECT_TRUE(miFlushWithPostSyncFound);
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST_F(EncodeMiFlushDWTest, GivenLinearStreamWhenCllaedEncodeWithTimestampFaslseThenPostSyncDataTypeIsSet) {
|
||||||
|
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
|
||||||
|
uint32_t pCmdBuffer[1024];
|
||||||
|
MockGraphicsAllocation gfxAllocation((void *)pCmdBuffer, sizeof(pCmdBuffer));
|
||||||
|
LinearStream stream(&gfxAllocation);
|
||||||
|
|
||||||
|
uint64_t address = 0x1000;
|
||||||
|
uint64_t data = 0x4321;
|
||||||
|
|
||||||
|
EncodeMiFlushDW<FamilyType>::programMiFlushDw(stream, address, data, false, true);
|
||||||
|
|
||||||
|
GenCmdList commands;
|
||||||
|
CmdParse<FamilyType>::parseCommandBuffer(commands, stream.getCpuBase(), stream.getUsed());
|
||||||
|
|
||||||
|
auto itor = commands.begin();
|
||||||
|
itor = find<MI_FLUSH_DW *>(itor, commands.end());
|
||||||
|
ASSERT_NE(itor, commands.end());
|
||||||
|
bool miFlushWithPostSyncFound = false;
|
||||||
|
for (; itor != commands.end(); itor++) {
|
||||||
|
auto cmd = genCmdCast<MI_FLUSH_DW *>(*itor);
|
||||||
|
if (cmd->getPostSyncOperation() != MI_FLUSH_DW::POST_SYNC_OPERATION_NO_WRITE) {
|
||||||
|
EXPECT_EQ(cmd->getPostSyncOperation(), MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD);
|
||||||
|
miFlushWithPostSyncFound = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
EXPECT_TRUE(miFlushWithPostSyncFound);
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST_F(EncodeMiFlushDWTest, GivenLinearStreamWhenCllaedEncodeWithTimestampTrueThenPostSyncDataTypeIsSet) {
|
||||||
|
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
|
||||||
|
uint32_t pCmdBuffer[1024];
|
||||||
|
MockGraphicsAllocation gfxAllocation((void *)pCmdBuffer, sizeof(pCmdBuffer));
|
||||||
|
LinearStream stream(&gfxAllocation);
|
||||||
|
|
||||||
|
uint64_t address = 0x1000;
|
||||||
|
uint64_t data = 0x4321;
|
||||||
|
|
||||||
|
EncodeMiFlushDW<FamilyType>::programMiFlushDw(stream, address, data, true, true);
|
||||||
|
|
||||||
|
GenCmdList commands;
|
||||||
|
CmdParse<FamilyType>::parseCommandBuffer(commands, stream.getCpuBase(), stream.getUsed());
|
||||||
|
|
||||||
|
auto itor = commands.begin();
|
||||||
|
itor = find<MI_FLUSH_DW *>(itor, commands.end());
|
||||||
|
ASSERT_NE(itor, commands.end());
|
||||||
|
bool miFlushWithPostSyncFound = false;
|
||||||
|
for (; itor != commands.end(); itor++) {
|
||||||
|
auto cmd = genCmdCast<MI_FLUSH_DW *>(*itor);
|
||||||
|
if (cmd->getPostSyncOperation() != MI_FLUSH_DW::POST_SYNC_OPERATION_NO_WRITE) {
|
||||||
|
EXPECT_EQ(cmd->getPostSyncOperation(), MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_TIMESTAMP_REGISTER);
|
||||||
|
miFlushWithPostSyncFound = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
EXPECT_TRUE(miFlushWithPostSyncFound);
|
||||||
|
}
|
||||||
@@ -5,6 +5,7 @@
|
|||||||
#
|
#
|
||||||
|
|
||||||
set(NEO_CORE_HELPERS_TESTS
|
set(NEO_CORE_HELPERS_TESTS
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/blit_commands_helper_tests.cpp
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/debug_manager_state_restore.h
|
${CMAKE_CURRENT_SOURCE_DIR}/debug_manager_state_restore.h
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/default_hw_info.h
|
${CMAKE_CURRENT_SOURCE_DIR}/default_hw_info.h
|
||||||
|
|||||||
73
shared/test/unit_test/helpers/blit_commands_helper_tests.cpp
Normal file
73
shared/test/unit_test/helpers/blit_commands_helper_tests.cpp
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2017-2020 Intel Corporation
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "shared/source/helpers/blit_commands_helper.h"
|
||||||
|
|
||||||
|
#include "opencl/test/unit_test/mocks/mock_graphics_allocation.h"
|
||||||
|
|
||||||
|
#include "gtest/gtest.h"
|
||||||
|
|
||||||
|
using namespace NEO;
|
||||||
|
|
||||||
|
TEST(BlitCommandsHelperTest, GivenBufferParamsWhenConstructingPropertiesForBufferRegionsThenPropertiesCreatedCorrectly) {
|
||||||
|
uint32_t src[] = {1, 2, 3, 4};
|
||||||
|
uint32_t dst[] = {4, 3, 2, 1};
|
||||||
|
uint64_t srcGpuAddr = 0x12345;
|
||||||
|
uint64_t dstGpuAddr = 0x54321;
|
||||||
|
std::unique_ptr<MockGraphicsAllocation> srcAlloc(new MockGraphicsAllocation(src, srcGpuAddr, sizeof(src)));
|
||||||
|
std::unique_ptr<MockGraphicsAllocation> dstAlloc(new MockGraphicsAllocation(dst, dstGpuAddr, sizeof(dst)));
|
||||||
|
Vec3<size_t> srcOffsets{1, 2, 3};
|
||||||
|
Vec3<size_t> dstOffsets{3, 2, 1};
|
||||||
|
Vec3<size_t> copySize{2, 2, 2};
|
||||||
|
|
||||||
|
size_t srcRowPitch = 2;
|
||||||
|
size_t srcSlicePitch = 3;
|
||||||
|
|
||||||
|
size_t dstRowPitch = 2;
|
||||||
|
size_t dstSlicePitch = 3;
|
||||||
|
|
||||||
|
auto blitProperties = NEO::BlitProperties::constructPropertiesForCopyBuffer(dstAlloc.get(), srcAlloc.get(),
|
||||||
|
dstOffsets, srcOffsets, copySize, srcRowPitch, srcSlicePitch,
|
||||||
|
dstRowPitch, dstSlicePitch);
|
||||||
|
|
||||||
|
EXPECT_EQ(blitProperties.blitDirection, BlitterConstants::BlitDirection::BufferToBuffer);
|
||||||
|
EXPECT_EQ(blitProperties.dstAllocation, dstAlloc.get());
|
||||||
|
EXPECT_EQ(blitProperties.srcAllocation, srcAlloc.get());
|
||||||
|
EXPECT_EQ(blitProperties.dstGpuAddress, dstGpuAddr);
|
||||||
|
EXPECT_EQ(blitProperties.srcGpuAddress, srcGpuAddr);
|
||||||
|
EXPECT_EQ(blitProperties.copySize, copySize);
|
||||||
|
EXPECT_EQ(blitProperties.dstOffset, dstOffsets);
|
||||||
|
EXPECT_EQ(blitProperties.srcOffset, srcOffsets);
|
||||||
|
EXPECT_EQ(blitProperties.dstRowPitch, dstRowPitch);
|
||||||
|
EXPECT_EQ(blitProperties.dstSlicePitch, dstSlicePitch);
|
||||||
|
EXPECT_EQ(blitProperties.srcRowPitch, srcRowPitch);
|
||||||
|
EXPECT_EQ(blitProperties.srcSlicePitch, srcSlicePitch);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(BlitCommandsHelperTest, GivenCopySizeYAndZEqual0WhenConstructingPropertiesForBufferRegionsThenCopyZAndZEqual1) {
|
||||||
|
uint32_t src[] = {1, 2, 3, 4};
|
||||||
|
uint32_t dst[] = {4, 3, 2, 1};
|
||||||
|
uint64_t srcGpuAddr = 0x12345;
|
||||||
|
uint64_t dstGpuAddr = 0x54321;
|
||||||
|
std::unique_ptr<MockGraphicsAllocation> srcAlloc(new MockGraphicsAllocation(src, srcGpuAddr, sizeof(src)));
|
||||||
|
std::unique_ptr<MockGraphicsAllocation> dstAlloc(new MockGraphicsAllocation(dst, dstGpuAddr, sizeof(dst)));
|
||||||
|
Vec3<size_t> srcOffsets{1, 2, 3};
|
||||||
|
Vec3<size_t> dstOffsets{3, 2, 1};
|
||||||
|
Vec3<size_t> copySize{2, 0, 0};
|
||||||
|
|
||||||
|
size_t srcRowPitch = 2;
|
||||||
|
size_t srcSlicePitch = 3;
|
||||||
|
|
||||||
|
size_t dstRowPitch = 2;
|
||||||
|
size_t dstSlicePitch = 3;
|
||||||
|
|
||||||
|
auto blitProperties = NEO::BlitProperties::constructPropertiesForCopyBuffer(dstAlloc.get(), srcAlloc.get(),
|
||||||
|
dstOffsets, srcOffsets, copySize, srcRowPitch, srcSlicePitch,
|
||||||
|
dstRowPitch, dstSlicePitch);
|
||||||
|
Vec3<size_t> expectedSize{copySize.x, 1, 1};
|
||||||
|
EXPECT_EQ(blitProperties.copySize, expectedSize);
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user