diff --git a/level_zero/core/source/cmdlist/cmdlist.cpp b/level_zero/core/source/cmdlist/cmdlist.cpp index da357ca764..c71df11f04 100644 --- a/level_zero/core/source/cmdlist/cmdlist.cpp +++ b/level_zero/core/source/cmdlist/cmdlist.cpp @@ -77,9 +77,12 @@ void CommandList::eraseResidencyContainerEntry(NEO::GraphicsAllocation *allocati } } +bool CommandList::isCopyOnly() const { + return isCopyOnlyCmdList; +} + NEO::PreemptionMode CommandList::obtainFunctionPreemptionMode(Kernel *kernel) { auto functionAttributes = kernel->getImmutableData()->getDescriptor().kernelAttributes; - NEO::PreemptionFlags flags = {}; flags.flags.disabledMidThreadPreemptionKernel = functionAttributes.flags.requiresDisabledMidThreadPreemption; flags.flags.usesFencesForReadWriteImages = functionAttributes.flags.usesFencesForReadWriteImages; diff --git a/level_zero/core/source/cmdlist/cmdlist.h b/level_zero/core/source/cmdlist/cmdlist.h index ee148f48e2..6cb1221af2 100644 --- a/level_zero/core/source/cmdlist/cmdlist.h +++ b/level_zero/core/source/cmdlist/cmdlist.h @@ -117,10 +117,10 @@ struct CommandList : _ze_command_list_handle_t { virtual ze_result_t appendMIBBEnd() = 0; virtual ze_result_t appendMINoop() = 0; - static CommandList *create(uint32_t productFamily, Device *device); + static CommandList *create(uint32_t productFamily, Device *device, bool isCopyOnly); static CommandList *createImmediate(uint32_t productFamily, Device *device, const ze_command_queue_desc_t *desc, - bool internalUsage); + bool internalUsage, bool isCopyOnly); static CommandList *fromHandle(ze_command_list_handle_t handle) { return static_cast(handle); @@ -147,6 +147,7 @@ struct CommandList : _ze_command_list_handle_t { void removeHostPtrAllocations(); void eraseDeallocationContainerEntry(NEO::GraphicsAllocation *allocation); void eraseResidencyContainerEntry(NEO::GraphicsAllocation *allocation); + bool isCopyOnly() const; enum CommandListType : uint32_t { TYPE_REGULAR = 0u, @@ -161,7 +162,7 @@ struct CommandList : _ze_command_list_handle_t { std::vector printfFunctionContainer; virtual ze_result_t executeCommandListImmediate(bool performMigration) = 0; - virtual bool initialize(Device *device) = 0; + virtual bool initialize(Device *device, bool isCopyOnly) = 0; virtual ~CommandList(); NEO::CommandContainer commandContainer; @@ -169,6 +170,7 @@ struct CommandList : _ze_command_list_handle_t { std::map hostPtrMap; uint32_t commandListPerThreadScratchSize = 0u; NEO::PreemptionMode commandListPreemptionMode = NEO::PreemptionMode::Initial; + bool isCopyOnlyCmdList = false; }; using CommandListAllocatorFn = CommandList *(*)(uint32_t); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index 2b13e47f16..0c5ddcdffe 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -35,7 +35,7 @@ struct CommandListCoreFamily : CommandListImp { using CommandListImp::CommandListImp; - bool initialize(Device *device) override; + bool initialize(Device *device, bool isCopyOnly) override; virtual void programL3(bool isSLMused); ze_result_t close() override; @@ -124,26 +124,39 @@ struct CommandListCoreFamily : CommandListImp { ze_result_t executeCommandListImmediate(bool performMigration) override; protected: - ze_result_t appendMemoryCopyKernelWithGA(void *dstPtr, NEO::GraphicsAllocation *dstPtrAlloc, - uint64_t dstOffset, void *srcPtr, - NEO::GraphicsAllocation *srcPtrAlloc, - uint64_t srcOffset, uint32_t size, - uint32_t elementSize, Builtin builtin); + MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyKernelWithGA(void *dstPtr, NEO::GraphicsAllocation *dstPtrAlloc, + uint64_t dstOffset, void *srcPtr, + NEO::GraphicsAllocation *srcPtrAlloc, + uint64_t srcOffset, uint32_t size, + uint32_t elementSize, Builtin builtin); - ze_result_t appendMemoryCopyKernel2d(const void *dstptr, const void *srcptr, - Builtin builtin, const ze_copy_region_t *dstRegion, - uint32_t dstPitch, size_t dstOffset, - const ze_copy_region_t *srcRegion, uint32_t srcPitch, - size_t srcOffset, ze_event_handle_t hSignalEvent, - uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents); + MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyBlit(NEO::GraphicsAllocation *dstPtrAlloc, + uint64_t dstOffset, + NEO::GraphicsAllocation *srcPtrAlloc, + uint64_t srcOffset, uint32_t size); - ze_result_t appendMemoryCopyKernel3d(const void *dstptr, const void *srcptr, - Builtin builtin, const ze_copy_region_t *dstRegion, - uint32_t dstPitch, uint32_t dstSlicePitch, size_t dstOffset, - const ze_copy_region_t *srcRegion, uint32_t srcPitch, - uint32_t srcSlicePitch, size_t srcOffset, - ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, - ze_event_handle_t *phWaitEvents); + MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyBlitRegion(const void *srcptr, + const void *dstptr, + ze_copy_region_t srcRegion, + ze_copy_region_t dstRegion, Vec3 copySize, + size_t srcRowPitch, size_t srcSlicePitch, + size_t dstRowPitch, size_t dstSlicePitch, + size_t srcSize, size_t dstSize); + + MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyKernel2d(const void *dstptr, const void *srcptr, + Builtin builtin, const ze_copy_region_t *dstRegion, + uint32_t dstPitch, size_t dstOffset, + const ze_copy_region_t *srcRegion, uint32_t srcPitch, + size_t srcOffset, ze_event_handle_t hSignalEvent, + uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents); + + MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyKernel3d(const void *dstptr, const void *srcptr, + Builtin builtin, const ze_copy_region_t *dstRegion, + uint32_t dstPitch, uint32_t dstSlicePitch, size_t dstOffset, + const ze_copy_region_t *srcRegion, uint32_t srcPitch, + uint32_t srcSlicePitch, size_t srcOffset, + ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, + ze_event_handle_t *phWaitEvents); ze_result_t appendLaunchKernelWithParams(ze_kernel_handle_t hKernel, const ze_group_count_t *pThreadGroupDimensions, @@ -160,7 +173,7 @@ struct CommandListCoreFamily : CommandListImp { void appendSignalEventPostWalker(ze_event_handle_t hEvent); uint64_t getInputBufferSize(NEO::ImageType imageType, uint64_t bytesPerPixel, const ze_image_region_t *region); - AlignedAllocationData getAlignedAllocation(Device *device, const void *buffer, uint64_t bufferSize); + virtual AlignedAllocationData getAlignedAllocation(Device *device, const void *buffer, uint64_t bufferSize); ze_result_t addEventsToCmdList(ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents); }; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 96e357726c..d9c40e8e07 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -10,6 +10,7 @@ #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/device/device.h" +#include "shared/source/helpers/blit_commands_helper.h" #include "shared/source/helpers/heap_helper.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/hw_info.h" @@ -35,17 +36,19 @@ template struct EncodeStateBaseAddress; template -bool CommandListCoreFamily::initialize(Device *device) { +bool CommandListCoreFamily::initialize(Device *device, bool isCopyOnly) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; if (!commandContainer.initialize(static_cast(device)->neoDevice)) { return false; } - NEO::EncodeStateBaseAddress::encode(commandContainer); - commandContainer.setDirtyStateForAllHeaps(false); - + if (!isCopyOnly) { + NEO::EncodeStateBaseAddress::encode(commandContainer); + commandContainer.setDirtyStateForAllHeaps(false); + } this->device = device; this->commandListPreemptionMode = device->getDevicePreemptionMode(); + this->isCopyOnlyCmdList = isCopyOnly; return true; } @@ -182,7 +185,11 @@ ze_result_t CommandListCoreFamily::appendBarrier(ze_event_handle_ return ZE_RESULT_ERROR_INVALID_ARGUMENT; } - NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), false); + if (isCopyOnlyCmdList) { + NEO::EncodeMiFlushDW::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, false, false); + } else { + NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), false); + } if (hSignalEvent) { this->appendSignalEventPostWalker(hSignalEvent); @@ -565,6 +572,48 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyKernelWithGA(v nullptr, 0, nullptr); } +template +ze_result_t CommandListCoreFamily::appendMemoryCopyBlit(NEO::GraphicsAllocation *dstPtrAlloc, + uint64_t dstOffset, + NEO::GraphicsAllocation *srcPtrAlloc, + uint64_t srcOffset, + uint32_t size) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + auto blitProperties = NEO::BlitProperties::constructPropertiesForCopyBuffer(dstPtrAlloc, srcPtrAlloc, {dstOffset, 0, 0}, {srcOffset, 0, 0}, {size, 0, 0}, 0, 0, 0, 0); + commandContainer.addToResidencyContainer(dstPtrAlloc); + commandContainer.addToResidencyContainer(srcPtrAlloc); + NEO::BlitCommandsHelper::dispatchBlitCommandsForBuffer(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]); + return ZE_RESULT_SUCCESS; +} + +template +ze_result_t CommandListCoreFamily::appendMemoryCopyBlitRegion(const void *srcptr, + const void *dstptr, + ze_copy_region_t srcRegion, + ze_copy_region_t dstRegion, Vec3 copySize, + size_t srcRowPitch, size_t srcSlicePitch, + size_t dstRowPitch, size_t dstSlicePitch, + size_t srcSize, size_t dstSize) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + + auto dstAllocationStruct = getAlignedAllocation(this->device, dstptr, dstSize); + auto srcAllocationStruct = getAlignedAllocation(this->device, srcptr, srcSize); + + auto srcPtrAlloc = srcAllocationStruct.alloc; + auto dstPtrAlloc = dstAllocationStruct.alloc; + + Vec3 srcPtrOffset = {srcRegion.originX, srcRegion.originY, srcRegion.originZ}; + Vec3 dstPtrOffset = {dstRegion.originX, dstRegion.originY, dstRegion.originZ}; + + auto blitProperties = NEO::BlitProperties::constructPropertiesForCopyBuffer(dstPtrAlloc, srcPtrAlloc, + dstPtrOffset, srcPtrOffset, copySize, srcRowPitch, srcSlicePitch, + dstRowPitch, dstSlicePitch); + commandContainer.addToResidencyContainer(dstPtrAlloc); + commandContainer.addToResidencyContainer(srcPtrAlloc); + NEO::BlitCommandsHelper::dispatchBlitCommandsForBuffer(blitProperties, *commandContainer.getCommandStream(), *device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]); + return ZE_RESULT_SUCCESS; +} + template ze_result_t CommandListCoreFamily::appendPageFaultCopy(NEO::GraphicsAllocation *dstptr, NEO::GraphicsAllocation *srcptr, @@ -644,31 +693,37 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, appendEventForProfiling(hSignalEvent, true); if (ret == ZE_RESULT_SUCCESS && leftSize) { - ret = appendMemoryCopyKernelWithGA(reinterpret_cast(&dstAllocationStruct.alignedAllocationPtr), - dstAllocationStruct.alloc, dstAllocationStruct.offset, - reinterpret_cast(&srcAllocationStruct.alignedAllocationPtr), - srcAllocationStruct.alloc, srcAllocationStruct.offset, - static_cast(leftSize), 1, - Builtin::CopyBufferToBufferSide); + ret = isCopyOnlyCmdList ? appendMemoryCopyBlit(dstAllocationStruct.alloc, dstAllocationStruct.offset, + srcAllocationStruct.alloc, srcAllocationStruct.offset, static_cast(leftSize)) + : appendMemoryCopyKernelWithGA(reinterpret_cast(&dstAllocationStruct.alignedAllocationPtr), + dstAllocationStruct.alloc, dstAllocationStruct.offset, + reinterpret_cast(&srcAllocationStruct.alignedAllocationPtr), + srcAllocationStruct.alloc, srcAllocationStruct.offset, + static_cast(leftSize), 1, + Builtin::CopyBufferToBufferSide); } if (ret == ZE_RESULT_SUCCESS && middleSizeBytes) { - ret = appendMemoryCopyKernelWithGA(reinterpret_cast(&dstAllocationStruct.alignedAllocationPtr), - dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset, - reinterpret_cast(&srcAllocationStruct.alignedAllocationPtr), - srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset, - static_cast(middleSizeBytes), - static_cast(middleElSize), - Builtin::CopyBufferToBufferMiddle); + ret = isCopyOnlyCmdList ? appendMemoryCopyBlit(dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset, + srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset, static_cast(middleSizeBytes)) + : appendMemoryCopyKernelWithGA(reinterpret_cast(&dstAllocationStruct.alignedAllocationPtr), + dstAllocationStruct.alloc, leftSize + dstAllocationStruct.offset, + reinterpret_cast(&srcAllocationStruct.alignedAllocationPtr), + srcAllocationStruct.alloc, leftSize + srcAllocationStruct.offset, + static_cast(middleSizeBytes), + static_cast(middleElSize), + Builtin::CopyBufferToBufferMiddle); } if (ret == ZE_RESULT_SUCCESS && rightSize) { - ret = appendMemoryCopyKernelWithGA(reinterpret_cast(&dstAllocationStruct.alignedAllocationPtr), - dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset, - reinterpret_cast(&srcAllocationStruct.alignedAllocationPtr), - srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset, - static_cast(rightSize), 1u, - Builtin::CopyBufferToBufferSide); + ret = isCopyOnlyCmdList ? appendMemoryCopyBlit(dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset, + srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset, static_cast(rightSize)) + : appendMemoryCopyKernelWithGA(reinterpret_cast(&dstAllocationStruct.alignedAllocationPtr), + dstAllocationStruct.alloc, leftSize + middleSizeBytes + dstAllocationStruct.offset, + reinterpret_cast(&srcAllocationStruct.alignedAllocationPtr), + srcAllocationStruct.alloc, leftSize + middleSizeBytes + srcAllocationStruct.offset, + static_cast(rightSize), 1u, + Builtin::CopyBufferToBufferSide); } if (hSignalEvent) { @@ -743,13 +798,15 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyRegion(void *d ze_result_t result = ZE_RESULT_SUCCESS; if (srcRegion->depth > 1) { - result = this->appendMemoryCopyKernel3d(alignedDstPtr, alignedSrcPtr, - Builtin::CopyBufferRectBytes3d, dstRegion, dstPitch, dstSlicePitch, dstOffset, - srcRegion, srcPitch, srcSlicePitch, srcOffset, hSignalEvent, 0, nullptr); + result = isCopyOnlyCmdList ? appendMemoryCopyBlitRegion(alignedSrcPtr, alignedDstPtr, *srcRegion, *dstRegion, {srcRegion->width, srcRegion->height, srcRegion->depth}, srcPitch, srcSlicePitch, dstPitch, dstSlicePitch, srcSize, dstSize) + : this->appendMemoryCopyKernel3d(alignedDstPtr, alignedSrcPtr, + Builtin::CopyBufferRectBytes3d, dstRegion, dstPitch, dstSlicePitch, dstOffset, + srcRegion, srcPitch, srcSlicePitch, srcOffset, hSignalEvent, 0, nullptr); } else { - result = this->appendMemoryCopyKernel2d(alignedDstPtr, alignedSrcPtr, - Builtin::CopyBufferRectBytes2d, dstRegion, dstPitch, dstOffset, - srcRegion, srcPitch, srcOffset, hSignalEvent, 0, nullptr); + result = isCopyOnlyCmdList ? appendMemoryCopyBlitRegion(alignedSrcPtr, alignedDstPtr, *srcRegion, *dstRegion, {srcRegion->width, srcRegion->height, srcRegion->depth}, srcPitch, srcSlicePitch, dstPitch, dstSlicePitch, srcSize, dstSize) + : this->appendMemoryCopyKernel2d(alignedDstPtr, alignedSrcPtr, + Builtin::CopyBufferRectBytes2d, dstRegion, dstPitch, dstOffset, + srcRegion, srcPitch, srcOffset, hSignalEvent, 0, nullptr); } if (result) { @@ -757,7 +814,11 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyRegion(void *d } if (hostPointerNeedsFlush) { - NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), true); + if (isCopyOnlyCmdList) { + NEO::EncodeMiFlushDW::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, false, false); + } else { + NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), true); + } } return ZE_RESULT_SUCCESS; @@ -1091,12 +1152,14 @@ ze_result_t CommandListCoreFamily::appendSignalEvent(ze_event_han auto event = Event::fromHandle(hEvent); commandContainer.addToResidencyContainer(&event->getAllocation()); - - bool dcFlushEnable = (event->signalScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true; - NEO::MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( - *commandContainer.getCommandStream(), POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, - event->getGpuAddress(), Event::STATE_SIGNALED, dcFlushEnable, commandContainer.getDevice()->getHardwareInfo()); - + if (isCopyOnlyCmdList) { + NEO::EncodeMiFlushDW::programMiFlushDw(*commandContainer.getCommandStream(), event->getGpuAddress(), Event::STATE_SIGNALED, false, true); + } else { + bool dcFlushEnable = (event->signalScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true; + NEO::MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( + *commandContainer.getCommandStream(), POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + event->getGpuAddress(), Event::STATE_SIGNALED, dcFlushEnable, commandContainer.getDevice()->getHardwareInfo()); + } return ZE_RESULT_SUCCESS; } @@ -1127,7 +1190,11 @@ ze_result_t CommandListCoreFamily::appendWaitOnEvents(uint32_t nu bool dcFlushEnable = (event->waitScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true; if (dcFlushEnable) { - NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), true); + if (isCopyOnlyCmdList) { + NEO::EncodeMiFlushDW::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, false, false); + } else { + NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), true); + } } } diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl index 9dc03fd58e..447978efb2 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl @@ -101,19 +101,22 @@ void CommandListCoreFamily::appendEventForProfiling(ze_event_hand timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::CONTEXT_START); NEO::EncodeStoreMMIO::encode(commandContainer, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timeStampAddress); } else { - timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::CONTEXT_END); NEO::EncodeStoreMMIO::encode(commandContainer, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timeStampAddress); timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::GLOBAL_END); bool dcFlushEnable = (event->signalScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true; - NEO::MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( - *(commandContainer.getCommandStream()), POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP, - timeStampAddress, - 0llu, - dcFlushEnable, - device->getHwInfo()); + if (isCopyOnlyCmdList) { + NEO::EncodeMiFlushDW::programMiFlushDw(*commandContainer.getCommandStream(), timeStampAddress, 0llu, true, true); + } else { + NEO::MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( + *(commandContainer.getCommandStream()), POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP, + timeStampAddress, + 0llu, + dcFlushEnable, + device->getHwInfo()); + } } } } // namespace L0 diff --git a/level_zero/core/source/cmdlist/cmdlist_imp.cpp b/level_zero/core/source/cmdlist/cmdlist_imp.cpp index ddc0c6701d..f605b381aa 100644 --- a/level_zero/core/source/cmdlist/cmdlist_imp.cpp +++ b/level_zero/core/source/cmdlist/cmdlist_imp.cpp @@ -10,6 +10,7 @@ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/device/device.h" +#include "shared/source/helpers/engine_node_helper.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "level_zero/core/source/device/device.h" @@ -48,7 +49,7 @@ ze_result_t CommandListImp::appendMetricQueryEnd(zet_metric_query_handle_t hMetr return MetricQuery::fromHandle(hMetricQuery)->appendEnd(*this, hCompletionEvent); } -CommandList *CommandList::create(uint32_t productFamily, Device *device) { +CommandList *CommandList::create(uint32_t productFamily, Device *device, bool isCopyOnly) { CommandListAllocatorFn allocator = nullptr; if (productFamily < IGFX_MAX_PRODUCT) { allocator = commandListFactory[productFamily]; @@ -58,24 +59,27 @@ CommandList *CommandList::create(uint32_t productFamily, Device *device) { if (allocator) { commandList = static_cast((*allocator)(CommandList::defaultNumIddsPerBlock)); - commandList->initialize(device); + commandList->initialize(device, isCopyOnly); } return commandList; } CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device, const ze_command_queue_desc_t *desc, - bool internalUsage) { + bool internalUsage, bool isCopyOnly) { auto deviceImp = static_cast(device); NEO::CommandStreamReceiver *csr = nullptr; if (internalUsage) { csr = deviceImp->neoDevice->getInternalEngine().commandStreamReceiver; + } else if (isCopyOnly) { + auto &selectorCopyEngine = deviceImp->neoDevice->getDeviceById(0)->getSelectorCopyEngine(); + csr = deviceImp->neoDevice->getDeviceById(0)->getEngine(NEO::EngineHelpers::getBcsEngineType(deviceImp->neoDevice->getHardwareInfo(), selectorCopyEngine), false).commandStreamReceiver; } else { csr = deviceImp->neoDevice->getDefaultEngine().commandStreamReceiver; } - auto commandQueue = CommandQueue::create(productFamily, device, csr, desc); + auto commandQueue = CommandQueue::create(productFamily, device, csr, desc, isCopyOnly); if (!commandQueue) { return nullptr; } @@ -89,7 +93,7 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device if (allocator) { commandList = static_cast((*allocator)(CommandList::commandListimmediateIddsPerBlock)); - commandList->initialize(device); + commandList->initialize(device, isCopyOnly); } if (!commandList) { diff --git a/level_zero/core/source/cmdqueue/cmdqueue.cpp b/level_zero/core/source/cmdqueue/cmdqueue.cpp index 94718d8db4..23414fc2ff 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue.cpp +++ b/level_zero/core/source/cmdqueue/cmdqueue.cpp @@ -28,13 +28,14 @@ ze_result_t CommandQueueImp::destroy() { return ZE_RESULT_SUCCESS; } -void CommandQueueImp::initialize() { +void CommandQueueImp::initialize(bool copyOnly) { buffers.initialize(device, totalCmdBufferSize); NEO::GraphicsAllocation *bufferAllocation = buffers.getCurrentBufferAllocation(); commandStream = new NEO::LinearStream(bufferAllocation->getUnderlyingBuffer(), defaultQueueCmdBufferSize); UNRECOVERABLE_IF(commandStream == nullptr); commandStream->replaceGraphicsAllocation(bufferAllocation); + isCopyOnlyCommandQueue = copyOnly; } void CommandQueueImp::reserveLinearStreamSize(size_t size) { @@ -91,7 +92,7 @@ void CommandQueueImp::printFunctionsPrintfOutput() { } CommandQueue *CommandQueue::create(uint32_t productFamily, Device *device, NEO::CommandStreamReceiver *csr, - const ze_command_queue_desc_t *desc) { + const ze_command_queue_desc_t *desc, bool isCopyOnly) { CommandQueueAllocatorFn allocator = nullptr; if (productFamily < IGFX_MAX_PRODUCT) { allocator = commandQueueFactory[productFamily]; @@ -101,7 +102,7 @@ CommandQueue *CommandQueue::create(uint32_t productFamily, Device *device, NEO:: if (allocator) { commandQueue = static_cast((*allocator)(device, csr, desc)); - commandQueue->initialize(); + commandQueue->initialize(isCopyOnly); } return commandQueue; } diff --git a/level_zero/core/source/cmdqueue/cmdqueue.h b/level_zero/core/source/cmdqueue/cmdqueue.h index b4c5658b16..bc457b7eef 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue.h +++ b/level_zero/core/source/cmdqueue/cmdqueue.h @@ -42,7 +42,7 @@ struct CommandQueue : _ze_command_queue_handle_t { virtual ze_result_t synchronize(uint32_t timeout) = 0; static CommandQueue *create(uint32_t productFamily, Device *device, NEO::CommandStreamReceiver *csr, - const ze_command_queue_desc_t *desc); + const ze_command_queue_desc_t *desc, bool isCopyOnly); static CommandQueue *fromHandle(ze_command_queue_handle_t handle) { return static_cast(handle); @@ -58,6 +58,7 @@ struct CommandQueue : _ze_command_queue_handle_t { std::atomic commandQueuePerThreadScratchSize; NEO::PreemptionMode commandQueuePreemptionMode = NEO::PreemptionMode::Initial; bool commandQueueDebugCmdsProgrammed = false; + bool isCopyOnlyCommandQueue = false; }; using CommandQueueAllocatorFn = CommandQueue *(*)(Device *device, NEO::CommandStreamReceiver *csr, diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl index 9e2b1353d7..a283fbaede 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl @@ -62,6 +62,13 @@ ze_result_t CommandQueueHw::executeCommandLists( using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; + for (auto i = 0u; i < numCommandLists; i++) { + auto commandList = CommandList::fromHandle(phCommandLists[i]); + if (isCopyOnlyCommandQueue != commandList->isCopyOnly()) { + return ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE; + } + } + size_t spaceForResidency = 0; size_t preemptionSize = 0u; size_t debuggerCmdsSize = 0; @@ -122,7 +129,7 @@ ze_result_t CommandQueueHw::executeCommandLists( if (hFence) { fence = Fence::fromHandle(hFence); spaceForResidency += residencyContainerSpaceForFence; - linearStreamSizeEstimate += NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(device->getHwInfo()); + linearStreamSizeEstimate += isCopyOnlyCommandQueue ? NEO::EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite() : NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(device->getHwInfo()); } spaceForResidency += residencyContainerSpaceForTagWrite; @@ -138,70 +145,72 @@ ze_result_t CommandQueueHw::executeCommandLists( gsbaStateDirty |= !gsbaInit; frontEndStateDirty |= !frontEndInit; + if (!isCopyOnlyCommandQueue) { - if (!gpgpuEnabled) { - linearStreamSizeEstimate += estimatePipelineSelect(); + if (!gpgpuEnabled) { + linearStreamSizeEstimate += estimatePipelineSelect(); + } + + if (frontEndStateDirty) { + linearStreamSizeEstimate += estimateFrontEndCmdSize(); + } + + if (gsbaStateDirty) { + linearStreamSizeEstimate += estimateStateBaseAddressCmdSize(); + } + + linearStreamSizeEstimate += preemptionSize + debuggerCmdsSize; } - if (frontEndStateDirty) { - linearStreamSizeEstimate += estimateFrontEndCmdSize(); - } - - if (gsbaStateDirty) { - linearStreamSizeEstimate += estimateStateBaseAddressCmdSize(); - } - - linearStreamSizeEstimate += NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(device->getHwInfo()); - - linearStreamSizeEstimate += preemptionSize + debuggerCmdsSize; - + linearStreamSizeEstimate += isCopyOnlyCommandQueue ? NEO::EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite() : NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(device->getHwInfo()); size_t alignedSize = alignUp(linearStreamSizeEstimate, minCmdBufferPtrAlign); size_t padding = alignedSize - linearStreamSizeEstimate; reserveLinearStreamSize(alignedSize); NEO::LinearStream child(commandStream->getSpace(alignedSize), alignedSize); - if (!gpgpuEnabled) { - programPipelineSelect(child); - } + if (!isCopyOnlyCommandQueue) { + if (!gpgpuEnabled) { + programPipelineSelect(child); + } - if (!commandQueueDebugCmdsProgrammed && neoDevice->isDebuggerActive()) { - NEO::PreambleHelper::programKernelDebugging(&child); - commandQueueDebugCmdsProgrammed = true; - } + if (!commandQueueDebugCmdsProgrammed && neoDevice->isDebuggerActive()) { + NEO::PreambleHelper::programKernelDebugging(&child); + commandQueueDebugCmdsProgrammed = true; + } - if (frontEndStateDirty) { - programFrontEnd(scratchSpaceController->getScratchPatchAddress(), child); - } - if (gsbaStateDirty) { - programGeneralStateBaseAddress(scratchSpaceController->calculateNewGSH(), child); - } + if (frontEndStateDirty) { + programFrontEnd(scratchSpaceController->getScratchPatchAddress(), child); + } + if (gsbaStateDirty) { + programGeneralStateBaseAddress(scratchSpaceController->calculateNewGSH(), child); + } - if (commandQueuePreemptionMode == NEO::PreemptionMode::Initial) { - NEO::PreemptionHelper::programCsrBaseAddress(child, *neoDevice, csr->getPreemptionAllocation()); - NEO::PreemptionHelper::programStateSip(child, *neoDevice); - NEO::PreemptionHelper::programCmdStream(child, - devicePreemption, - commandQueuePreemptionMode, - csr->getPreemptionAllocation()); - commandQueuePreemptionMode = devicePreemption; - statePreemption = commandQueuePreemptionMode; - } + if (commandQueuePreemptionMode == NEO::PreemptionMode::Initial) { + NEO::PreemptionHelper::programCsrBaseAddress(child, *neoDevice, csr->getPreemptionAllocation()); + NEO::PreemptionHelper::programStateSip(child, *neoDevice); + NEO::PreemptionHelper::programCmdStream(child, + devicePreemption, + commandQueuePreemptionMode, + csr->getPreemptionAllocation()); + commandQueuePreemptionMode = devicePreemption; + statePreemption = commandQueuePreemptionMode; + } - const bool sipKernelUsed = devicePreemption == NEO::PreemptionMode::MidThread || - neoDevice->isDebuggerActive(); - if (devicePreemption == NEO::PreemptionMode::MidThread) { - residencyContainer.push_back(csr->getPreemptionAllocation()); - } + const bool sipKernelUsed = devicePreemption == NEO::PreemptionMode::MidThread || + neoDevice->isDebuggerActive(); + if (devicePreemption == NEO::PreemptionMode::MidThread) { + residencyContainer.push_back(csr->getPreemptionAllocation()); + } - if (sipKernelUsed) { - auto sipIsa = NEO::SipKernel::getSipKernelAllocation(*neoDevice); - residencyContainer.push_back(sipIsa); - } + if (sipKernelUsed) { + auto sipIsa = NEO::SipKernel::getSipKernelAllocation(*neoDevice); + residencyContainer.push_back(sipIsa); + } - if (neoDevice->isDebuggerActive()) { - residencyContainer.push_back(device->getDebugSurface()); + if (neoDevice->isDebuggerActive()) { + residencyContainer.push_back(device->getDebugSurface()); + } } - for (auto i = 0u; i < numCommandLists; ++i) { auto commandList = CommandList::fromHandle(phCommandLists[i]); auto cmdBufferAllocations = commandList->commandContainer.getCmdBufferAllocations(); @@ -254,12 +263,17 @@ ze_result_t CommandQueueHw::executeCommandLists( if (hFence) { residencyContainer.push_back(&fence->getAllocation()); - NEO::MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( - child, POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, - fence->getGpuAddress(), Fence::STATE_SIGNALED, true, device->getHwInfo()); + if (isCopyOnlyCommandQueue) { + NEO::EncodeMiFlushDW::programMiFlushDw(child, fence->getGpuAddress(), Fence::STATE_SIGNALED, false, true); + } else { + NEO::MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( + child, POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + fence->getGpuAddress(), Fence::STATE_SIGNALED, true, device->getHwInfo()); + } } dispatchTaskCountWrite(child, true); + residencyContainer.push_back(csr->getTagAllocation()); void *endingCmd = nullptr; if (directSubmissionEnabled) { @@ -334,8 +348,12 @@ void CommandQueueHw::dispatchTaskCountWrite(NEO::LinearStream &co auto taskCountToWrite = csr->peekTaskCount() + 1; auto gpuAddress = static_cast(csr->getTagAllocation()->getGpuAddress()); - NEO::MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( - commandStream, POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, - gpuAddress, taskCountToWrite, true, device->getHwInfo()); + if (isCopyOnlyCommandQueue) { + NEO::EncodeMiFlushDW::programMiFlushDw(commandStream, gpuAddress, taskCountToWrite, false, true); + } else { + NEO::MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( + commandStream, POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + gpuAddress, taskCountToWrite, true, device->getHwInfo()); + } } } // namespace L0 diff --git a/level_zero/core/source/cmdqueue/cmdqueue_imp.h b/level_zero/core/source/cmdqueue/cmdqueue_imp.h index 512f5c1818..b2eb7ba8af 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_imp.h +++ b/level_zero/core/source/cmdqueue/cmdqueue_imp.h @@ -68,7 +68,7 @@ struct CommandQueueImp : public CommandQueue { ze_result_t synchronize(uint32_t timeout) override; - void initialize(); + void initialize(bool copyOnly); Device *getDevice() { return device; } diff --git a/level_zero/core/source/device/device_imp.cpp b/level_zero/core/source/device/device_imp.cpp index 3afe580017..82cedee028 100644 --- a/level_zero/core/source/device/device_imp.cpp +++ b/level_zero/core/source/device/device_imp.cpp @@ -15,6 +15,7 @@ #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/constants.h" +#include "shared/source/helpers/engine_node_helper.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/string.h" #include "shared/source/kernel/grf_config.h" @@ -77,7 +78,12 @@ ze_result_t DeviceImp::canAccessPeer(ze_device_handle_t hPeerDevice, ze_bool_t * ze_result_t DeviceImp::createCommandList(const ze_command_list_desc_t *desc, ze_command_list_handle_t *commandList) { auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily; - *commandList = CommandList::create(productFamily, this); + bool useBliter = false; + auto ret = isCreatedCommandListCopyOnly(desc, &useBliter, ZE_COMMAND_LIST_FLAG_COPY_ONLY); + if (ret != ZE_RESULT_SUCCESS) { + return ret; + } + *commandList = CommandList::create(productFamily, this, useBliter); return ZE_RESULT_SUCCESS; } @@ -85,7 +91,14 @@ ze_result_t DeviceImp::createCommandList(const ze_command_list_desc_t *desc, ze_result_t DeviceImp::createCommandListImmediate(const ze_command_queue_desc_t *desc, ze_command_list_handle_t *phCommandList) { auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily; - *phCommandList = CommandList::createImmediate(productFamily, this, desc, false); + + bool useBliter = false; + auto ret = isCreatedCommandListCopyOnly(desc, &useBliter, ZE_COMMAND_QUEUE_FLAG_COPY_ONLY); + if (ret != ZE_RESULT_SUCCESS) { + return ret; + } + + *phCommandList = CommandList::createImmediate(productFamily, this, desc, false, useBliter); return ZE_RESULT_SUCCESS; } @@ -94,9 +107,19 @@ ze_result_t DeviceImp::createCommandQueue(const ze_command_queue_desc_t *desc, ze_command_queue_handle_t *commandQueue) { auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily; - auto csr = neoDevice->getDefaultEngine().commandStreamReceiver; - - *commandQueue = CommandQueue::create(productFamily, this, csr, desc); + NEO::CommandStreamReceiver *csr = nullptr; + bool useBliter = false; + auto ret = isCreatedCommandListCopyOnly(desc, &useBliter, ZE_COMMAND_QUEUE_FLAG_COPY_ONLY); + if (ret != ZE_RESULT_SUCCESS) { + return ret; + } + if (useBliter) { + auto &selectorCopyEngine = this->neoDevice->getDeviceById(0)->getSelectorCopyEngine(); + csr = this->neoDevice->getDeviceById(0)->getEngine(NEO::EngineHelpers::getBcsEngineType(neoDevice->getHardwareInfo(), selectorCopyEngine), false).commandStreamReceiver; + } else { + csr = neoDevice->getDefaultEngine().commandStreamReceiver; + } + *commandQueue = CommandQueue::create(productFamily, this, csr, desc, useBliter); return ZE_RESULT_SUCCESS; } @@ -515,7 +538,7 @@ ze_result_t DeviceImp::registerCLCommandQueue(cl_context context, cl_command_que auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily; auto csr = neoDevice->getDefaultEngine().commandStreamReceiver; - *phCommandQueue = CommandQueue::create(productFamily, this, csr, &desc); + *phCommandQueue = CommandQueue::create(productFamily, this, csr, &desc, false); return ZE_RESULT_SUCCESS; } @@ -574,7 +597,7 @@ Device *Device::create(DriverHandle *driverHandle, NEO::Device *neoDevice) { cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; device->pageFaultCommandList = CommandList::createImmediate( - device->neoDevice->getHardwareInfo().platform.eProductFamily, device, &cmdQueueDesc, true); + device->neoDevice->getHardwareInfo().platform.eProductFamily, device, &cmdQueueDesc, true, false); } if (neoDevice->getDeviceInfo().debuggerActive) { @@ -696,4 +719,17 @@ NEO::GraphicsAllocation *DeviceImp::allocateMemoryFromHostPtr(const void *buffer return allocation; } +template +ze_result_t DeviceImp::isCreatedCommandListCopyOnly(const DescriptionType *desc, bool *useBliter, ExpectedFlagType flag) { + if (desc->flags & flag) { + auto hwInfo = neoDevice->getHardwareInfo(); + if (hwInfo.capabilityTable.blitterOperationsSupported) { + *useBliter = true; + return ZE_RESULT_SUCCESS; + } + return ZE_RESULT_ERROR_INVALID_ENUMERATION; + } + return ZE_RESULT_SUCCESS; +} + } // namespace L0 diff --git a/level_zero/core/source/device/device_imp.h b/level_zero/core/source/device/device_imp.h index 836e8484e3..cc4e93a9d8 100644 --- a/level_zero/core/source/device/device_imp.h +++ b/level_zero/core/source/device/device_imp.h @@ -93,6 +93,8 @@ struct DeviceImp : public Device { CommandList *pageFaultCommandList = nullptr; protected: + template + ze_result_t isCreatedCommandListCopyOnly(const DescriptionType *desc, bool *useBliter, ExpectedFlagType flag); NEO::GraphicsAllocation *debugSurface = nullptr; }; diff --git a/level_zero/core/source/event/event.h b/level_zero/core/source/event/event.h index aaea8aeeaa..f47839e064 100644 --- a/level_zero/core/source/event/event.h +++ b/level_zero/core/source/event/event.h @@ -50,7 +50,7 @@ struct Event : _ze_event_handle_t { inline ze_event_handle_t toHandle() { return this; } - NEO::GraphicsAllocation &getAllocation(); + virtual NEO::GraphicsAllocation &getAllocation(); uint64_t getGpuAddress() { return gpuAddress; } uint64_t getOffsetOfEventTimestampRegister(uint32_t eventTimestampReg); @@ -97,7 +97,7 @@ struct EventPool : _ze_event_pool_handle_t { inline ze_event_pool_handle_t toHandle() { return this; } - NEO::GraphicsAllocation &getAllocation() { return *eventPoolAllocation; } + virtual NEO::GraphicsAllocation &getAllocation() { return *eventPoolAllocation; } virtual uint32_t getEventSize() = 0; virtual uint32_t getNumEventTimestampsToRead() = 0; diff --git a/level_zero/core/test/unit_tests/gen9/test_cmdlist_append_launch_kernel_gen9.cpp b/level_zero/core/test/unit_tests/gen9/test_cmdlist_append_launch_kernel_gen9.cpp index 499b44fd3e..b8b96b43d3 100644 --- a/level_zero/core/test/unit_tests/gen9/test_cmdlist_append_launch_kernel_gen9.cpp +++ b/level_zero/core/test/unit_tests/gen9/test_cmdlist_append_launch_kernel_gen9.cpp @@ -22,7 +22,7 @@ using IsSKLOrKBL = IsWithinProducts; HWTEST2_F(CommandListAppendLaunchKernel, givenKernelWithSLMThenL3IsProgrammedWithSLMValue, IsSKLOrKBL) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; createKernel(); - std::unique_ptr commandList(CommandList::create(productFamily, device)); + std::unique_ptr commandList(CommandList::create(productFamily, device, false)); ze_group_count_t groupCount{1, 1, 1}; EXPECT_LE(0u, kernel->kernelImmData->getDescriptor().kernelAttributes.slmInlineSize); diff --git a/level_zero/core/test/unit_tests/mocks/CMakeLists.txt b/level_zero/core/test/unit_tests/mocks/CMakeLists.txt index 804f72bb33..7d7f0068c8 100644 --- a/level_zero/core/test/unit_tests/mocks/CMakeLists.txt +++ b/level_zero/core/test/unit_tests/mocks/CMakeLists.txt @@ -20,6 +20,8 @@ set(L0_MOCKS_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/mock_driver.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_driver_handle.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_driver_handle.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/mock_event.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/mock_event.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_kernel.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_memory_manager.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_memory_manager.cpp diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index d546040b50..b097c787c0 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -32,6 +32,7 @@ struct WhiteBox<::L0::CommandListCoreFamily> using BaseClass = ::L0::CommandListCoreFamily; using BaseClass::appendLaunchKernelWithParams; using BaseClass::commandListPreemptionMode; + using BaseClass::getAlignedAllocation; WhiteBox() : ::L0::CommandListCoreFamily(BaseClass::defaultNumIddsPerBlock) {} virtual ~WhiteBox() {} @@ -149,7 +150,7 @@ struct Mock : public CommandList { MOCK_METHOD0(appendMIBBEnd, ze_result_t()); MOCK_METHOD0(appendMINoop, ze_result_t()); MOCK_METHOD1(executeCommandListImmediate, ze_result_t(bool perforMigration)); - MOCK_METHOD1(initialize, bool(L0::Device *device)); + MOCK_METHOD2(initialize, bool(L0::Device *device, bool onlyCopyBlit)); uint8_t *batchBuffer = nullptr; NEO::GraphicsAllocation *mockAllocation = nullptr; diff --git a/level_zero/core/test/unit_tests/mocks/mock_event.cpp b/level_zero/core/test/unit_tests/mocks/mock_event.cpp new file mode 100644 index 0000000000..9bf9dcb8eb --- /dev/null +++ b/level_zero/core/test/unit_tests/mocks/mock_event.cpp @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2020 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "mock_event.h" + +#include + +namespace L0 { +namespace ult { + +Mock::Mock() : mockAllocation(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY, + &memory, reinterpret_cast(&memory), 0, sizeof(memory), + MemoryPool::System4KBPages) { allocation = &mockAllocation; } + +Mock::~Mock() {} + +Mock::Mock() : pool(1) { + pool = std::vector(1); + pool[0] = 0; + + EXPECT_CALL(*this, getPoolSize()).WillRepeatedly(testing::Return(1)); +} + +Mock::~Mock() { pool.clear(); } + +} // namespace ult +} // namespace L0 diff --git a/level_zero/core/test/unit_tests/mocks/mock_event.h b/level_zero/core/test/unit_tests/mocks/mock_event.h new file mode 100644 index 0000000000..a8fa8f752f --- /dev/null +++ b/level_zero/core/test/unit_tests/mocks/mock_event.h @@ -0,0 +1,86 @@ +/* + * Copyright (C) 2020 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once +#include "level_zero/core/source/device/device.h" +#include "level_zero/core/source/event/event.h" +#include "level_zero/core/test/unit_tests/mock.h" +#include "level_zero/core/test/unit_tests/white_box.h" + +#include + +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Winconsistent-missing-override" +#endif + +namespace L0 { +namespace ult { + +template <> +struct WhiteBox<::L0::Event> : public ::L0::Event { + using BaseClass = ::L0::Event; + using BaseClass::allocation; +}; + +using Event = WhiteBox<::L0::Event>; + +template <> +struct WhiteBox<::L0::EventPool> : public ::L0::EventPool { + using BaseClass = ::L0::EventPool; +}; + +using EventPool = WhiteBox<::L0::EventPool>; + +template <> +struct Mock : public Event { + Mock(); + ~Mock() override; + + MOCK_METHOD3(create, L0::Event *(::L0::EventPool *eventPool, const ze_event_desc_t *desc, ::L0::Device *device)); + MOCK_METHOD0(destroy, ze_result_t()); + MOCK_METHOD0(hostSignal, ze_result_t()); + MOCK_METHOD1(hostSynchronize, ze_result_t(uint32_t timeout)); + MOCK_METHOD0(queryStatus, ze_result_t()); + MOCK_METHOD0(reset, ze_result_t()); + MOCK_METHOD2(getTimestamp, ze_result_t(ze_event_timestamp_type_t timestampType, void *dstptr)); + + // Fake an allocation for event memory + alignas(16) uint32_t memory = -1; + NEO::GraphicsAllocation mockAllocation; + + using Event::allocation; +}; + +template <> +struct Mock : public EventPool { + Mock(); + ~Mock() override; + + MOCK_METHOD0(destroy, ze_result_t()); + MOCK_METHOD0(getPoolSize, size_t()); + MOCK_METHOD0(getPoolUsedCount, uint32_t()); + MOCK_METHOD1(getIpcHandle, ze_result_t(ze_ipc_event_pool_handle_t *pIpcHandle)); + MOCK_METHOD0(closeIpcHandle, ze_result_t()); + MOCK_METHOD2(createEvent, ze_result_t(const ze_event_desc_t *desc, ze_event_handle_t *phEvent)); + MOCK_METHOD2(reserveEventFromPool, ze_result_t(int index, ::L0::Event *event)); + MOCK_METHOD1(releaseEventToPool, ze_result_t(::L0::Event *event)); + MOCK_METHOD0(getDevice, Device *()); + MOCK_METHOD0(getEventSize, uint32_t()); + MOCK_METHOD0(getNumEventTimestampsToRead, uint32_t()); + + std::vector pool; + + using EventPool::eventPoolAllocation; +}; + +} // namespace ult +} // namespace L0 + +#if defined(__clang__) +#pragma clang diagnostic pop +#endif diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist.cpp index cff9f4aa89..8f2a3961d8 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist.cpp @@ -6,11 +6,16 @@ */ #include "shared/source/gmm_helper/gmm_helper.h" +#include "shared/source/helpers/hw_info.h" #include "shared/test/unit_test/cmd_parse/gen_cmd_parse.h" +#include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "test.h" +#include "level_zero/core/source/cmdlist/cmdlist_hw.h" +#include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" +#include "level_zero/core/test/unit_tests/mocks/mock_event.h" #include "level_zero/core/test/unit_tests/mocks/mock_kernel.h" namespace L0 { @@ -32,7 +37,7 @@ TEST(zeCommandListCreateImmediate, redirectsToObject) { } TEST_F(CommandListCreate, whenCommandListIsCreatedThenItIsInitialized) { - std::unique_ptr commandList(CommandList::create(productFamily, device)); + std::unique_ptr commandList(CommandList::create(productFamily, device, false)); ASSERT_NE(nullptr, commandList); EXPECT_EQ(device, commandList->device); @@ -57,7 +62,7 @@ TEST_F(CommandListCreate, whenCommandListIsCreatedThenItIsInitialized) { } TEST_F(CommandListCreate, givenRegularCommandListThenDefaultNumIddPerBlockIsUsed) { - std::unique_ptr commandList(CommandList::create(productFamily, device)); + std::unique_ptr commandList(CommandList::create(productFamily, device, false)); ASSERT_NE(nullptr, commandList); const uint32_t defaultNumIdds = CommandList::defaultNumIddsPerBlock; @@ -71,7 +76,7 @@ TEST_F(CommandListCreate, givenImmediateCommandListThenCustomNumIddPerBlockUsed) ZE_COMMAND_QUEUE_MODE_DEFAULT, ZE_COMMAND_QUEUE_PRIORITY_NORMAL, 0}; - std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false)); + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, false)); ASSERT_NE(nullptr, commandList); const uint32_t cmdListImmediateIdds = CommandList::commandListimmediateIddsPerBlock; @@ -85,7 +90,7 @@ TEST_F(CommandListCreate, whenCreatingImmediateCommandListThenItHasImmediateComm ZE_COMMAND_QUEUE_MODE_DEFAULT, ZE_COMMAND_QUEUE_PRIORITY_NORMAL, 0}; - std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false)); + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, false)); ASSERT_NE(nullptr, commandList); EXPECT_EQ(device, commandList->device); @@ -94,14 +99,14 @@ TEST_F(CommandListCreate, whenCreatingImmediateCommandListThenItHasImmediateComm } TEST_F(CommandListCreate, givenInvalidProductFamilyThenReturnsNullPointer) { - std::unique_ptr commandList(CommandList::create(IGFX_UNKNOWN, device)); + std::unique_ptr commandList(CommandList::create(IGFX_UNKNOWN, device, false)); EXPECT_EQ(nullptr, commandList); } HWTEST_F(CommandListCreate, whenCommandListIsCreatedThenStateBaseAddressCmdIsAddedAndCorrectlyProgrammed) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; - std::unique_ptr commandList(CommandList::create(productFamily, device)); + std::unique_ptr commandList(CommandList::create(productFamily, device, false)); auto &commandContainer = commandList->commandContainer; auto gmmHelper = commandContainer.getDevice()->getGmmHelper(); @@ -141,5 +146,265 @@ HWTEST_F(CommandListCreate, whenCommandListIsCreatedThenStateBaseAddressCmdIsAdd EXPECT_EQ(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER), cmdSba->getStatelessDataPortAccessMemoryObjectControlState()); } +HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenCreatedThenStateBaseAddressCmdIsNotProgrammed) { + using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; + + std::unique_ptr commandList(CommandList::create(productFamily, device, true)); + auto &commandContainer = commandList->commandContainer; + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); + auto itor = find(cmdList.begin(), cmdList.end()); + + EXPECT_EQ(cmdList.end(), itor); +} + +HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenSetBarrierThenMiFlushDWIsProgrammed) { + using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; + std::unique_ptr commandList(CommandList::create(productFamily, device, true)); + auto &commandContainer = commandList->commandContainer; + commandList->appendBarrier(nullptr, 0, nullptr); + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); + auto itor = find(cmdList.begin(), cmdList.end()); + + EXPECT_NE(cmdList.end(), itor); +} + +HWTEST_F(CommandListCreate, givenCommandListWhenSetBarrierThenPipeControlIsProgrammed) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + std::unique_ptr commandList(CommandList::create(productFamily, device, false)); + auto &commandContainer = commandList->commandContainer; + commandList->appendBarrier(nullptr, 0, nullptr); + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); + auto itor = find(cmdList.begin(), cmdList.end()); + + EXPECT_NE(cmdList.end(), itor); +} +template +class MockCommandList : public WhiteBox<::L0::CommandListCoreFamily> { + public: + MockCommandList() : WhiteBox<::L0::CommandListCoreFamily>(1) {} + + AlignedAllocationData getAlignedAllocation(L0::Device *device, const void *buffer, uint64_t bufferSize) override { + return {0, 0, nullptr, true}; + } + ze_result_t appendMemoryCopyKernelWithGA(void *dstPtr, + NEO::GraphicsAllocation *dstPtrAlloc, + uint64_t dstOffset, + void *srcPtr, + NEO::GraphicsAllocation *srcPtrAlloc, + uint64_t srcOffset, + uint32_t size, + uint32_t elementSize, + Builtin builtin) override { + appendMemoryCopyKernelWithGACalledTimes++; + return ZE_RESULT_SUCCESS; + } + virtual ze_result_t appendMemoryCopyBlit(NEO::GraphicsAllocation *dstPtrAlloc, + uint64_t dstOffset, + NEO::GraphicsAllocation *srcPtrAlloc, + uint64_t srcOffset, uint32_t size) override { + appendMemoryCopyBlitCalledTimes++; + return ZE_RESULT_SUCCESS; + } + + virtual ze_result_t appendMemoryCopyBlitRegion(const void *srcptr, + const void *dstptr, + ze_copy_region_t srcRegion, + ze_copy_region_t dstRegion, Vec3 copySize, + size_t srcRowPitch, size_t srcSlicePitch, + size_t dstRowPitch, size_t dstSlicePitch, + size_t srcSize, size_t dstSize) override { + appendMemoryCopyBlitRegionCalledTimes++; + return ZE_RESULT_SUCCESS; + } + + virtual ze_result_t appendMemoryCopyKernel2d(const void *dstptr, const void *srcptr, + Builtin builtin, const ze_copy_region_t *dstRegion, + uint32_t dstPitch, size_t dstOffset, + const ze_copy_region_t *srcRegion, uint32_t srcPitch, + size_t srcOffset, ze_event_handle_t hSignalEvent, + uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override { + appendMemoryCopyKernel2dCalledTimes++; + return ZE_RESULT_SUCCESS; + } + + virtual ze_result_t appendMemoryCopyKernel3d(const void *dstptr, const void *srcptr, + Builtin builtin, const ze_copy_region_t *dstRegion, + uint32_t dstPitch, uint32_t dstSlicePitch, size_t dstOffset, + const ze_copy_region_t *srcRegion, uint32_t srcPitch, + uint32_t srcSlicePitch, size_t srcOffset, + ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, + ze_event_handle_t *phWaitEvents) override { + appendMemoryCopyKernel3dalledTimes++; + return ZE_RESULT_SUCCESS; + } + uint32_t appendMemoryCopyKernelWithGACalledTimes = 0; + uint32_t appendMemoryCopyBlitCalledTimes = 0; + uint32_t appendMemoryCopyBlitRegionCalledTimes = 0; + uint32_t appendMemoryCopyKernel2dCalledTimes = 0; + uint32_t appendMemoryCopyKernel3dalledTimes = 0; +}; + +using Platforms = IsAtLeastProduct; + +HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyCalledThenAppendMemoryCopyWithappendMemoryCopyKernelWithGACalled, Platforms) { + MockCommandList cmdList; + cmdList.initialize(device, false); + void *srcPtr = reinterpret_cast(0x1234); + void *dstPtr = reinterpret_cast(0x2345); + cmdList.appendMemoryCopy(dstPtr, srcPtr, 0x1001, nullptr, 0, nullptr); + EXPECT_GT(cmdList.appendMemoryCopyKernelWithGACalledTimes, 0u); + EXPECT_EQ(cmdList.appendMemoryCopyBlitCalledTimes, 0u); +} + +HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyCalledThenAppendMemoryCopyWithappendMemoryCopyWithBliterCalled, Platforms) { + MockCommandList cmdList; + cmdList.initialize(device, true); + void *srcPtr = reinterpret_cast(0x1234); + void *dstPtr = reinterpret_cast(0x2345); + cmdList.appendMemoryCopy(dstPtr, srcPtr, 0x1001, nullptr, 0, nullptr); + EXPECT_EQ(cmdList.appendMemoryCopyKernelWithGACalledTimes, 0u); + EXPECT_GT(cmdList.appendMemoryCopyBlitCalledTimes, 0u); +} + +class MockDriverHandle : public L0::DriverHandleImp { + public: + bool findAllocationDataForRange(const void *buffer, + size_t size, + NEO::SvmAllocationData **allocData) override { + if (allocData) { + *allocData = &data; + } + return true; + } + NEO::SvmAllocationData data = {}; +}; + +HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyRegionCalledThenAppendMemoryCopyWithappendMemoryCopyWithBliterCalled, Platforms) { + MockCommandList cmdList; + cmdList.initialize(device, true); + MockDriverHandle driverHandle; + device->setDriverHandle(&driverHandle); + void *srcPtr = reinterpret_cast(0x1234); + void *dstPtr = reinterpret_cast(0x2345); + ze_copy_region_t dstRegion = {}; + ze_copy_region_t srcRegion = {}; + cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr); + EXPECT_GT(cmdList.appendMemoryCopyBlitRegionCalledTimes, 0u); +} + +HWTEST2_F(CommandListCreate, givenCommandListAnd3DWhbufferenMemoryCopyRegionCalledThenCopyKernel3DCalled, Platforms) { + MockCommandList cmdList; + cmdList.initialize(device, false); + MockDriverHandle driverHandle; + device->setDriverHandle(&driverHandle); + void *srcPtr = reinterpret_cast(0x1234); + void *dstPtr = reinterpret_cast(0x2345); + ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2}; + ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2}; + cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr); + EXPECT_EQ(cmdList.appendMemoryCopyBlitRegionCalledTimes, 0u); + EXPECT_GT(cmdList.appendMemoryCopyKernel3dalledTimes, 0u); +} + +HWTEST2_F(CommandListCreate, givenCommandListAnd2DWhbufferenMemoryCopyRegionCalledThenCopyKernel2DCalled, Platforms) { + MockCommandList cmdList; + cmdList.initialize(device, false); + MockDriverHandle driverHandle; + device->setDriverHandle(&driverHandle); + void *srcPtr = reinterpret_cast(0x1234); + void *dstPtr = reinterpret_cast(0x2345); + ze_copy_region_t dstRegion = {4, 4, 0, 2, 2, 1}; + ze_copy_region_t srcRegion = {4, 4, 0, 2, 2, 1}; + cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr); + EXPECT_EQ(cmdList.appendMemoryCopyBlitRegionCalledTimes, 0u); + EXPECT_GT(cmdList.appendMemoryCopyKernel2dCalledTimes, 0u); +} + +class MockEvent : public Mock { + public: + MockEvent() { + mockAllocation.reset(new NEO::MockGraphicsAllocation(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY, + reinterpret_cast(0x1234), 0x1000, 0, sizeof(uint32_t), + MemoryPool::System4KBPages)); + gpuAddress = mockAllocation->getGpuAddress(); + } + NEO::GraphicsAllocation &getAllocation() override { + return *mockAllocation.get(); + } + std::unique_ptr mockAllocation; +}; + +HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenAppendSignalEventThenMiFlushDWIsProgrammed) { + using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; + std::unique_ptr commandList(CommandList::create(productFamily, device, true)); + auto &commandContainer = commandList->commandContainer; + MockEvent event; + event.waitScope = ZE_EVENT_SCOPE_FLAG_NONE; + event.signalScope = ZE_EVENT_SCOPE_FLAG_NONE; + commandList->appendSignalEvent(event.toHandle()); + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); + auto itor = find(cmdList.begin(), cmdList.end()); + + EXPECT_NE(cmdList.end(), itor); +} + +HWTEST_F(CommandListCreate, givenCommandListyWhenAppendSignalEventThePipeControlIsProgrammed) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + std::unique_ptr commandList(CommandList::create(productFamily, device, false)); + auto &commandContainer = commandList->commandContainer; + MockEvent event; + event.waitScope = ZE_EVENT_SCOPE_FLAG_NONE; + event.signalScope = ZE_EVENT_SCOPE_FLAG_NONE; + commandList->appendSignalEvent(event.toHandle()); + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); + auto itor = find(cmdList.begin(), cmdList.end()); + + EXPECT_NE(cmdList.end(), itor); +} +HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenAppendWaitEventsWithDcFlushThenMiFlushDWIsProgrammed) { + using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; + std::unique_ptr commandList(CommandList::create(productFamily, device, true)); + auto &commandContainer = commandList->commandContainer; + MockEvent event; + event.signalScope = ZE_EVENT_SCOPE_FLAG_NONE; + event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST; + auto eventHandle = event.toHandle(); + commandList->appendWaitOnEvents(1, &eventHandle); + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); + auto itor = find(cmdList.begin(), cmdList.end()); + + EXPECT_NE(cmdList.end(), itor); +} + +HWTEST_F(CommandListCreate, givenCommandListyWhenAppendWaitEventsWithDcFlushThePipeControlIsProgrammed) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + std::unique_ptr commandList(CommandList::create(productFamily, device, false)); + auto &commandContainer = commandList->commandContainer; + MockEvent event; + event.signalScope = ZE_EVENT_SCOPE_FLAG_NONE; + event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST; + auto eventHandle = event.toHandle(); + commandList->appendWaitOnEvents(1, &eventHandle); + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); + auto itor = find(cmdList.begin(), cmdList.end()); + + EXPECT_NE(cmdList.end(), itor); +} + } // namespace ult -} // namespace L0 \ No newline at end of file +} // namespace L0 diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel.cpp index b24118fa79..484c3bcc53 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel.cpp @@ -23,7 +23,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenAp using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; createKernel(); - std::unique_ptr commandList(CommandList::create(productFamily, device)); + std::unique_ptr commandList(CommandList::create(productFamily, device, false)); auto &commandContainer = commandList->commandContainer; const auto stream = commandContainer.getCommandStream(); @@ -59,7 +59,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandListAppendLaunchKernel, givenFunctionWhenBind createKernel(); ze_group_count_t groupCount{1, 1, 1}; - std::unique_ptr commandList(CommandList::create(productFamily, device)); + std::unique_ptr commandList(CommandList::create(productFamily, device, false)); commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr); auto commandStream = commandList->commandContainer.getCommandStream(); @@ -86,7 +86,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandListAppendLaunchKernel, givenFunctionWhenBind HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfUsedWhenAppendedToCommandListThenKernelIsStored) { createKernel(); - std::unique_ptr commandList(CommandList::create(productFamily, device)); + std::unique_ptr commandList(CommandList::create(productFamily, device, false)); ze_group_count_t groupCount{1, 1, 1}; EXPECT_TRUE(kernel->kernelImmData->getDescriptor().kernelAttributes.flags.usesPrintf); @@ -100,7 +100,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfUsedWhenAppendedToC HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfUsedWhenAppendedToCommandListMultipleTimesThenKernelIsStoredOnce) { createKernel(); - std::unique_ptr commandList(CommandList::create(productFamily, device)); + std::unique_ptr commandList(CommandList::create(productFamily, device, false)); ze_group_count_t groupCount{1, 1, 1}; EXPECT_TRUE(kernel->kernelImmData->getDescriptor().kernelAttributes.flags.usesPrintf); @@ -118,7 +118,7 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfUsedWhenAppendedToC HWTEST_F(CommandListAppendLaunchKernel, WhenAppendingMultipleTimesThenSshIsNotDepletedButReallocated) { createKernel(); - std::unique_ptr commandList(CommandList::create(productFamily, device)); + std::unique_ptr commandList(CommandList::create(productFamily, device, false)); ze_group_count_t groupCount{1, 1, 1}; auto kernelSshSize = kernel->getSurfaceStateHeapDataSize(); @@ -143,7 +143,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, WhenAppendingFunctionThenUsedCmdBufferS ze_group_count_t groupCount{1, 1, 1}; auto commandList = std::make_unique>>(); - bool ret = commandList->initialize(device); + bool ret = commandList->initialize(device, false); ASSERT_TRUE(ret); auto sizeBefore = commandList->commandContainer.getCommandStream()->getUsed(); diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue.cpp index 7078ddf856..9a0d3deb10 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue.cpp @@ -9,6 +9,7 @@ #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/helpers/default_hw_info.h" +#include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" #include "test.h" #include "level_zero/core/source/driver/driver_handle_imp.h" @@ -34,7 +35,8 @@ TEST_F(CommandQueueCreate, whenCreatingCommandQueueThenItIsInitialized) { L0::CommandQueue *commandQueue = CommandQueue::create(productFamily, device, csr.get(), - &desc); + &desc, + false); ASSERT_NE(nullptr, commandQueue); L0::CommandQueueImp *commandQueueImp = reinterpret_cast(commandQueue); @@ -90,7 +92,7 @@ HWTEST2_F(CommandQueueProgramSBATest, whenCreatingCommandQueueThenItIsInitialize desc.version = ZE_COMMAND_QUEUE_DESC_VERSION_CURRENT; auto csr = std::unique_ptr(neoDevice->createCommandStreamReceiver()); auto commandQueue = new MockCommandQueueHw(device, csr.get(), &desc); - commandQueue->initialize(); + commandQueue->initialize(false); uint32_t alignedSize = 4096u; NEO::LinearStream child(commandQueue->commandStream->getSpace(alignedSize), alignedSize); @@ -103,5 +105,56 @@ HWTEST2_F(CommandQueueProgramSBATest, whenCreatingCommandQueueThenItIsInitialize commandQueue->destroy(); } +TEST_F(CommandQueueCreate, givenCmdQueueWithBlitCopyWhenExecutingNonCopyBlitCommandListThenWrongCommandListStatusReturned) { + const ze_command_queue_desc_t desc = { + ZE_COMMAND_QUEUE_DESC_VERSION_CURRENT, + ZE_COMMAND_QUEUE_FLAG_COPY_ONLY, + ZE_COMMAND_QUEUE_MODE_DEFAULT, + ZE_COMMAND_QUEUE_PRIORITY_NORMAL, + 0}; + + auto csr = std::unique_ptr(neoDevice->createCommandStreamReceiver()); + + L0::CommandQueue *commandQueue = CommandQueue::create(productFamily, + device, + csr.get(), + &desc, + true); + ASSERT_NE(nullptr, commandQueue); + + std::unique_ptr commandList(CommandList::create(productFamily, device, false)); + auto commandListHandle = commandList->toHandle(); + auto status = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); + + EXPECT_EQ(status, ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE); + + commandQueue->destroy(); +} + +TEST_F(CommandQueueCreate, givenCmdQueueWithBlitCopyWhenExecutingCopyBlitCommandListThenSuccessReturned) { + const ze_command_queue_desc_t desc = { + ZE_COMMAND_QUEUE_DESC_VERSION_CURRENT, + ZE_COMMAND_QUEUE_FLAG_COPY_ONLY, + ZE_COMMAND_QUEUE_MODE_DEFAULT, + ZE_COMMAND_QUEUE_PRIORITY_NORMAL, + 0}; + + auto defaultCsr = neoDevice->getDefaultEngine().commandStreamReceiver; + L0::CommandQueue *commandQueue = CommandQueue::create(productFamily, + device, + defaultCsr, + &desc, + true); + ASSERT_NE(nullptr, commandQueue); + + std::unique_ptr commandList(CommandList::create(productFamily, device, true)); + auto commandListHandle = commandList->toHandle(); + auto status = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); + + EXPECT_EQ(status, ZE_RESULT_SUCCESS); + + commandQueue->destroy(); +} + } // namespace ult } // namespace L0 \ No newline at end of file diff --git a/level_zero/core/test/unit_tests/sources/debugger/test_debugger.cpp b/level_zero/core/test/unit_tests/sources/debugger/test_debugger.cpp index be7f725705..71cc591e2e 100644 --- a/level_zero/core/test/unit_tests/sources/debugger/test_debugger.cpp +++ b/level_zero/core/test/unit_tests/sources/debugger/test_debugger.cpp @@ -72,13 +72,13 @@ using CommandQueueDebugCommandsTest = Test; HWTEST_F(CommandQueueDebugCommandsTest, givenDebuggingEnabledWhenCommandListIsExecutedThenKernelDebugCommandsAreAdded) { ze_command_queue_desc_t queueDesc = {}; - auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, deviceL0, device->getDefaultEngine().commandStreamReceiver, &queueDesc)); + auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, deviceL0, device->getDefaultEngine().commandStreamReceiver, &queueDesc, false)); ASSERT_NE(nullptr, commandQueue->commandStream); auto usedSpaceBefore = commandQueue->commandStream->getUsed(); ze_command_list_handle_t commandLists[] = { - CommandList::create(productFamily, deviceL0)->toHandle()}; + CommandList::create(productFamily, deviceL0, false)->toHandle()}; uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index 986b321645..883db15207 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -224,7 +224,7 @@ struct EncodeBatchBufferStartOrEnd { template struct EncodeMiFlushDW { using MI_FLUSH_DW = typename GfxFamily::MI_FLUSH_DW; - static void programMiFlushDw(LinearStream &commandStream, uint64_t immediateDataGpuAddress, uint64_t immediateData); + static void programMiFlushDw(LinearStream &commandStream, uint64_t immediateDataGpuAddress, uint64_t immediateData, bool timeStampOperation, bool commandWithPostSync); static void programMiFlushDwWA(LinearStream &commandStream); static void appendMiFlushDw(MI_FLUSH_DW *miFlushDwCmd); static size_t getMiFlushDwCmdSizeForDataWrite(); diff --git a/shared/source/command_container/command_encoder.inl b/shared/source/command_container/command_encoder.inl index 76f501fa49..7f0181b1b3 100644 --- a/shared/source/command_container/command_encoder.inl +++ b/shared/source/command_container/command_encoder.inl @@ -395,14 +395,17 @@ void EncodeSurfaceState::getSshAlignedPointer(uintptr_t &ptr, size_t &of } template -void EncodeMiFlushDW::programMiFlushDw(LinearStream &commandStream, uint64_t immediateDataGpuAddress, uint64_t immediateData) { +void EncodeMiFlushDW::programMiFlushDw(LinearStream &commandStream, uint64_t immediateDataGpuAddress, uint64_t immediateData, bool timeStampOperation, bool commandWithPostSync) { programMiFlushDwWA(commandStream); auto miFlushDwCmd = commandStream.getSpaceForCmd(); *miFlushDwCmd = GfxFamily::cmdInitMiFlushDw; - miFlushDwCmd->setPostSyncOperation(MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD); - miFlushDwCmd->setDestinationAddress(immediateDataGpuAddress); - miFlushDwCmd->setImmediateData(immediateData); + if (commandWithPostSync) { + auto postSyncType = timeStampOperation ? MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_TIMESTAMP_REGISTER : MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD; + miFlushDwCmd->setPostSyncOperation(postSyncType); + miFlushDwCmd->setDestinationAddress(immediateDataGpuAddress); + miFlushDwCmd->setImmediateData(immediateData); + } appendMiFlushDw(miFlushDwCmd); } diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index 43ba8bd8bd..5d48d0e0c1 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -849,7 +849,7 @@ uint32_t CommandStreamReceiverHw::blitBuffer(const BlitPropertiesCont if (blitProperties.outputTimestampPacket) { auto timestampPacketGpuAddress = blitProperties.outputTimestampPacket->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd); - EncodeMiFlushDW::programMiFlushDw(commandStream, timestampPacketGpuAddress, 0); + EncodeMiFlushDW::programMiFlushDw(commandStream, timestampPacketGpuAddress, 0, true, true); makeResident(*blitProperties.outputTimestampPacket->getBaseGraphicsAllocation()); } @@ -861,7 +861,7 @@ uint32_t CommandStreamReceiverHw::blitBuffer(const BlitPropertiesCont MemorySynchronizationCommands::addAdditionalSynchronization(commandStream, tagAllocation->getGpuAddress(), peekHwInfo()); - EncodeMiFlushDW::programMiFlushDw(commandStream, tagAllocation->getGpuAddress(), newTaskCount); + EncodeMiFlushDW::programMiFlushDw(commandStream, tagAllocation->getGpuAddress(), newTaskCount, false, true); MemorySynchronizationCommands::addAdditionalSynchronization(commandStream, tagAllocation->getGpuAddress(), peekHwInfo()); diff --git a/shared/test/unit_test/command_container/command_encoder_tests.cpp b/shared/test/unit_test/command_container/command_encoder_tests.cpp index f86bbefe64..a74a9e18dd 100644 --- a/shared/test/unit_test/command_container/command_encoder_tests.cpp +++ b/shared/test/unit_test/command_container/command_encoder_tests.cpp @@ -35,7 +35,7 @@ HWTEST_F(CommandEncoderTests, givenImmDataWriteWhenProgrammingMiFlushDwThenSetAl uint64_t gpuAddress = 0x1230000; uint64_t immData = 456; - EncodeMiFlushDW::programMiFlushDw(linearStream, gpuAddress, immData); + EncodeMiFlushDW::programMiFlushDw(linearStream, gpuAddress, immData, false, true); auto miFlushDwCmd = reinterpret_cast(buffer); unsigned int sizeMultiplier = 1; diff --git a/shared/test/unit_test/encoders/CMakeLists.txt b/shared/test/unit_test/encoders/CMakeLists.txt index bcb6480e4f..68721b1cce 100644 --- a/shared/test/unit_test/encoders/CMakeLists.txt +++ b/shared/test/unit_test/encoders/CMakeLists.txt @@ -11,6 +11,7 @@ set(NEO_CORE_ENCODERS_TESTS ${CMAKE_CURRENT_SOURCE_DIR}/test_encode_dispatch_kernel.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_encode_math.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_encode_media_interface_descriptor.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/test_encode_mi_flush_dw.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_encode_semaphore.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_encode_set_mmio.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_encode_states.cpp diff --git a/shared/test/unit_test/encoders/test_encode_mi_flush_dw.cpp b/shared/test/unit_test/encoders/test_encode_mi_flush_dw.cpp new file mode 100644 index 0000000000..b03325edb9 --- /dev/null +++ b/shared/test/unit_test/encoders/test_encode_mi_flush_dw.cpp @@ -0,0 +1,124 @@ +/* + * Copyright (C) 2017-2020 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/command_container/command_encoder.h" +#include "shared/source/command_stream/linear_stream.h" +#include "shared/source/helpers/blit_commands_helper.h" +#include "shared/test/unit_test/cmd_parse/gen_cmd_parse.h" + +#include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" +#include "test.h" + +using namespace NEO; + +using EncodeMiFlushDWTest = testing::Test; + +HWTEST_F(EncodeMiFlushDWTest, GivenLinearStreamWhenCllaedEncodeWithNoPostSyncThenPostSyncNotWriteIsSet) { + using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; + uint32_t pCmdBuffer[1024]; + MockGraphicsAllocation gfxAllocation((void *)pCmdBuffer, sizeof(pCmdBuffer)); + LinearStream stream(&gfxAllocation); + + EncodeMiFlushDW::programMiFlushDw(stream, 0, 0, false, false); + + GenCmdList commands; + CmdParse::parseCommandBuffer(commands, stream.getCpuBase(), stream.getUsed()); + + auto itor = commands.begin(); + itor = find(itor, commands.end()); + ASSERT_NE(itor, commands.end()); + auto cmd = genCmdCast(*itor); + EXPECT_EQ(cmd->getPostSyncOperation(), MI_FLUSH_DW::POST_SYNC_OPERATION_NO_WRITE); +} + +HWTEST_F(EncodeMiFlushDWTest, GivenLinearStreamWhenCllaedEncodeWithPostSyncDataThenPostSyncDataIsSet) { + using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; + uint32_t pCmdBuffer[1024]; + MockGraphicsAllocation gfxAllocation((void *)pCmdBuffer, sizeof(pCmdBuffer)); + LinearStream stream(&gfxAllocation); + + uint64_t address = 0x1000; + uint64_t data = 0x4321; + + EncodeMiFlushDW::programMiFlushDw(stream, address, data, false, true); + + GenCmdList commands; + CmdParse::parseCommandBuffer(commands, stream.getCpuBase(), stream.getUsed()); + + auto itor = commands.begin(); + itor = find(itor, commands.end()); + ASSERT_NE(itor, commands.end()); + bool miFlushWithPostSyncFound = false; + for (; itor != commands.end(); itor++) { + auto cmd = genCmdCast(*itor); + if (cmd->getPostSyncOperation() != MI_FLUSH_DW::POST_SYNC_OPERATION_NO_WRITE) { + EXPECT_EQ(cmd->getDestinationAddress(), address); + EXPECT_EQ(cmd->getImmediateData(), data); + miFlushWithPostSyncFound = true; + break; + } + } + EXPECT_TRUE(miFlushWithPostSyncFound); +} + +HWTEST_F(EncodeMiFlushDWTest, GivenLinearStreamWhenCllaedEncodeWithTimestampFaslseThenPostSyncDataTypeIsSet) { + using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; + uint32_t pCmdBuffer[1024]; + MockGraphicsAllocation gfxAllocation((void *)pCmdBuffer, sizeof(pCmdBuffer)); + LinearStream stream(&gfxAllocation); + + uint64_t address = 0x1000; + uint64_t data = 0x4321; + + EncodeMiFlushDW::programMiFlushDw(stream, address, data, false, true); + + GenCmdList commands; + CmdParse::parseCommandBuffer(commands, stream.getCpuBase(), stream.getUsed()); + + auto itor = commands.begin(); + itor = find(itor, commands.end()); + ASSERT_NE(itor, commands.end()); + bool miFlushWithPostSyncFound = false; + for (; itor != commands.end(); itor++) { + auto cmd = genCmdCast(*itor); + if (cmd->getPostSyncOperation() != MI_FLUSH_DW::POST_SYNC_OPERATION_NO_WRITE) { + EXPECT_EQ(cmd->getPostSyncOperation(), MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD); + miFlushWithPostSyncFound = true; + break; + } + } + EXPECT_TRUE(miFlushWithPostSyncFound); +} + +HWTEST_F(EncodeMiFlushDWTest, GivenLinearStreamWhenCllaedEncodeWithTimestampTrueThenPostSyncDataTypeIsSet) { + using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; + uint32_t pCmdBuffer[1024]; + MockGraphicsAllocation gfxAllocation((void *)pCmdBuffer, sizeof(pCmdBuffer)); + LinearStream stream(&gfxAllocation); + + uint64_t address = 0x1000; + uint64_t data = 0x4321; + + EncodeMiFlushDW::programMiFlushDw(stream, address, data, true, true); + + GenCmdList commands; + CmdParse::parseCommandBuffer(commands, stream.getCpuBase(), stream.getUsed()); + + auto itor = commands.begin(); + itor = find(itor, commands.end()); + ASSERT_NE(itor, commands.end()); + bool miFlushWithPostSyncFound = false; + for (; itor != commands.end(); itor++) { + auto cmd = genCmdCast(*itor); + if (cmd->getPostSyncOperation() != MI_FLUSH_DW::POST_SYNC_OPERATION_NO_WRITE) { + EXPECT_EQ(cmd->getPostSyncOperation(), MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_TIMESTAMP_REGISTER); + miFlushWithPostSyncFound = true; + break; + } + } + EXPECT_TRUE(miFlushWithPostSyncFound); +} \ No newline at end of file diff --git a/shared/test/unit_test/helpers/CMakeLists.txt b/shared/test/unit_test/helpers/CMakeLists.txt index 79eecab92f..4b40170cf7 100644 --- a/shared/test/unit_test/helpers/CMakeLists.txt +++ b/shared/test/unit_test/helpers/CMakeLists.txt @@ -5,6 +5,7 @@ # set(NEO_CORE_HELPERS_TESTS + ${CMAKE_CURRENT_SOURCE_DIR}/blit_commands_helper_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/debug_manager_state_restore.h ${CMAKE_CURRENT_SOURCE_DIR}/default_hw_info.h diff --git a/shared/test/unit_test/helpers/blit_commands_helper_tests.cpp b/shared/test/unit_test/helpers/blit_commands_helper_tests.cpp new file mode 100644 index 0000000000..3cac3fda22 --- /dev/null +++ b/shared/test/unit_test/helpers/blit_commands_helper_tests.cpp @@ -0,0 +1,73 @@ +/* + * Copyright (C) 2017-2020 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/helpers/blit_commands_helper.h" + +#include "opencl/test/unit_test/mocks/mock_graphics_allocation.h" + +#include "gtest/gtest.h" + +using namespace NEO; + +TEST(BlitCommandsHelperTest, GivenBufferParamsWhenConstructingPropertiesForBufferRegionsThenPropertiesCreatedCorrectly) { + uint32_t src[] = {1, 2, 3, 4}; + uint32_t dst[] = {4, 3, 2, 1}; + uint64_t srcGpuAddr = 0x12345; + uint64_t dstGpuAddr = 0x54321; + std::unique_ptr srcAlloc(new MockGraphicsAllocation(src, srcGpuAddr, sizeof(src))); + std::unique_ptr dstAlloc(new MockGraphicsAllocation(dst, dstGpuAddr, sizeof(dst))); + Vec3 srcOffsets{1, 2, 3}; + Vec3 dstOffsets{3, 2, 1}; + Vec3 copySize{2, 2, 2}; + + size_t srcRowPitch = 2; + size_t srcSlicePitch = 3; + + size_t dstRowPitch = 2; + size_t dstSlicePitch = 3; + + auto blitProperties = NEO::BlitProperties::constructPropertiesForCopyBuffer(dstAlloc.get(), srcAlloc.get(), + dstOffsets, srcOffsets, copySize, srcRowPitch, srcSlicePitch, + dstRowPitch, dstSlicePitch); + + EXPECT_EQ(blitProperties.blitDirection, BlitterConstants::BlitDirection::BufferToBuffer); + EXPECT_EQ(blitProperties.dstAllocation, dstAlloc.get()); + EXPECT_EQ(blitProperties.srcAllocation, srcAlloc.get()); + EXPECT_EQ(blitProperties.dstGpuAddress, dstGpuAddr); + EXPECT_EQ(blitProperties.srcGpuAddress, srcGpuAddr); + EXPECT_EQ(blitProperties.copySize, copySize); + EXPECT_EQ(blitProperties.dstOffset, dstOffsets); + EXPECT_EQ(blitProperties.srcOffset, srcOffsets); + EXPECT_EQ(blitProperties.dstRowPitch, dstRowPitch); + EXPECT_EQ(blitProperties.dstSlicePitch, dstSlicePitch); + EXPECT_EQ(blitProperties.srcRowPitch, srcRowPitch); + EXPECT_EQ(blitProperties.srcSlicePitch, srcSlicePitch); +} + +TEST(BlitCommandsHelperTest, GivenCopySizeYAndZEqual0WhenConstructingPropertiesForBufferRegionsThenCopyZAndZEqual1) { + uint32_t src[] = {1, 2, 3, 4}; + uint32_t dst[] = {4, 3, 2, 1}; + uint64_t srcGpuAddr = 0x12345; + uint64_t dstGpuAddr = 0x54321; + std::unique_ptr srcAlloc(new MockGraphicsAllocation(src, srcGpuAddr, sizeof(src))); + std::unique_ptr dstAlloc(new MockGraphicsAllocation(dst, dstGpuAddr, sizeof(dst))); + Vec3 srcOffsets{1, 2, 3}; + Vec3 dstOffsets{3, 2, 1}; + Vec3 copySize{2, 0, 0}; + + size_t srcRowPitch = 2; + size_t srcSlicePitch = 3; + + size_t dstRowPitch = 2; + size_t dstSlicePitch = 3; + + auto blitProperties = NEO::BlitProperties::constructPropertiesForCopyBuffer(dstAlloc.get(), srcAlloc.get(), + dstOffsets, srcOffsets, copySize, srcRowPitch, srcSlicePitch, + dstRowPitch, dstSlicePitch); + Vec3 expectedSize{copySize.x, 1, 1}; + EXPECT_EQ(blitProperties.copySize, expectedSize); +} \ No newline at end of file