From fc4a1d608d189f5ea551b7ef58dd3cfa5508a3ad Mon Sep 17 00:00:00 2001 From: Vinod Tipparaju Date: Mon, 8 Mar 2021 17:00:22 +0530 Subject: [PATCH] Refactor immediate command list with kernel operations to use flushTask(). This is applicable for submissions with sync & async command queue modes. Related-To: LOCI-1988 Signed-off-by: Vinod Tipparaju --- level_zero/core/source/cmdlist/cmdlist.cpp | 1 + level_zero/core/source/cmdlist/cmdlist.h | 25 +- level_zero/core/source/cmdlist/cmdlist_hw.h | 4 - level_zero/core/source/cmdlist/cmdlist_hw.inl | 12 +- .../core/source/cmdlist/cmdlist_hw_base.inl | 4 +- .../source/cmdlist/cmdlist_hw_immediate.h | 24 +- .../source/cmdlist/cmdlist_hw_immediate.inl | 343 +++++++++--- .../core/source/cmdlist/cmdlist_imp.cpp | 12 +- level_zero/core/source/cmdlist/cmdlist_imp.h | 1 + level_zero/core/source/event/event.h | 1 - level_zero/core/source/event/event_impl.inl | 10 - .../core/test/unit_tests/mocks/mock_cmdlist.h | 3 - .../sources/cmdlist/test_cmdlist_1.cpp | 54 +- .../sources/cmdlist/test_cmdlist_2.cpp | 72 +++ .../sources/cmdlist/test_cmdlist_3.cpp | 300 +++++++--- .../sources/cmdlist/test_cmdlist_4.cpp | 134 +++-- .../sources/cmdlist/test_cmdlist_5.cpp | 8 - .../test_cmdlist_append_launch_kernel.cpp | 99 ++-- .../sources/debugger/test_l0_debugger.cpp | 521 +++++++++++++++++- .../test/unit_test/test_files/igdrcl.config | 1 + .../source/command_container/cmdcontainer.cpp | 9 +- .../source/command_container/cmdcontainer.h | 5 + .../debug_settings/debug_variables_base.inl | 1 + 23 files changed, 1278 insertions(+), 366 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist.cpp b/level_zero/core/source/cmdlist/cmdlist.cpp index e23ec369d9..263c75aeac 100644 --- a/level_zero/core/source/cmdlist/cmdlist.cpp +++ b/level_zero/core/source/cmdlist/cmdlist.cpp @@ -21,6 +21,7 @@ CommandList::~CommandList() { removeHostPtrAllocations(); printfFunctionContainer.clear(); } + void CommandList::storePrintfFunction(Kernel *kernel) { auto it = std::find(this->printfFunctionContainer.begin(), this->printfFunctionContainer.end(), kernel); diff --git a/level_zero/core/source/cmdlist/cmdlist.h b/level_zero/core/source/cmdlist/cmdlist.h index 66f72e33e8..46a3429106 100644 --- a/level_zero/core/source/cmdlist/cmdlist.h +++ b/level_zero/core/source/cmdlist/cmdlist.h @@ -10,6 +10,7 @@ #include "shared/source/command_container/cmdcontainer.h" #include "shared/source/command_stream/preemption_mode.h" #include "shared/source/command_stream/stream_properties.h" +#include "shared/source/command_stream/thread_arbitration_policy.h" #include "level_zero/core/source/cmdqueue/cmdqueue.h" #include "level_zero/core/source/device/device.h" @@ -167,10 +168,22 @@ struct CommandList : _ze_command_list_handle_t { commandListPerThreadScratchSize = size; } + uint32_t getCommandListSLMEnable() const { + return commandListSLMEnabled; + } + + void setCommandListSLMEnable(bool isSLMEnabled) { + commandListSLMEnabled = isSLMEnabled; + } + NEO::PreemptionMode getCommandListPreemptionMode() const { return commandListPreemptionMode; } + uint32_t getThreadArbitrationPolicy() const { + return threadArbitrationPolicy; + } + UnifiedMemoryControls getUnifiedMemoryControls() const { return unifiedMemoryControls; } @@ -201,6 +214,7 @@ struct CommandList : _ze_command_list_handle_t { }; CommandQueue *cmdQImmediate = nullptr; + NEO::CommandStreamReceiver *csr = nullptr; uint32_t cmdListType = CommandListType::TYPE_REGULAR; Device *device = nullptr; std::vector printfFunctionContainer; @@ -214,8 +228,6 @@ struct CommandList : _ze_command_list_handle_t { return hostPtrMap; }; - virtual ze_result_t setSyncModeQueue(bool syncMode) = 0; - const NEO::StreamProperties &getRequiredStreamState() { return requiredStreamState; } @@ -226,10 +238,15 @@ struct CommandList : _ze_command_list_handle_t { return commandsToPatch; } - protected: - std::map hostPtrMap; + bool isSyncModeQueue = false; + bool commandListSLMEnabled = false; uint32_t commandListPerThreadScratchSize = 0u; NEO::PreemptionMode commandListPreemptionMode = NEO::PreemptionMode::Initial; + uint32_t threadArbitrationPolicy = NEO::ThreadArbitrationPolicy::RoundRobin; + bool isFlushTaskSubmissionEnabled = false; + + protected: + std::map hostPtrMap; NEO::EngineGroupType engineGroupType; ze_command_list_flags_t flags = 0u; UnifiedMemoryControls unifiedMemoryControls; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index 419ea1b8bb..4aac1395a2 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -157,10 +157,6 @@ struct CommandListCoreFamily : CommandListImp { ze_result_t executeCommandListImmediate(bool performMigration) override; size_t getReserveSshSize(); - ze_result_t setSyncModeQueue(bool syncMode) override { - return ZE_RESULT_SUCCESS; - } - protected: MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyKernelWithGA(void *dstPtr, NEO::GraphicsAllocation *dstPtrAlloc, uint64_t dstOffset, void *srcPtr, diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index b59df4eb62..ef472aafff 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -63,7 +63,7 @@ template void CommandListCoreFamily::programThreadArbitrationPolicy(Device *device) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; auto &hwHelper = NEO::HwHelper::get(device->getNEODevice()->getHardwareInfo().platform.eRenderCoreFamily); - uint32_t threadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy(); + threadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy(); if (NEO::DebugManager.flags.OverrideThreadArbitrationPolicy.get() != -1) { threadArbitrationPolicy = static_cast(NEO::DebugManager.flags.OverrideThreadArbitrationPolicy.get()); } @@ -87,6 +87,7 @@ ze_result_t CommandListCoreFamily::reset() { finalStreamState = requiredStreamState; containsAnyKernel = false; clearCommandsToPatch(); + commandListSLMEnabled = false; if (!isCopyOnly()) { if (!NEO::ApiSpecificConfig::getBindlessConfiguration()) { @@ -108,13 +109,20 @@ ze_result_t CommandListCoreFamily::initialize(Device *device, NEO this->engineGroupType = engineGroupType; this->flags = flags; + if (this->cmdListType == CommandListType::TYPE_IMMEDIATE) { + this->isFlushTaskSubmissionEnabled = NEO::DebugManager.flags.EnableFlushTaskSubmission.get(); + commandContainer.setFlushTaskUsedForImmediate(this->isFlushTaskSubmissionEnabled); + } + commandContainer.setReservedSshSize(getReserveSshSize()); auto returnValue = commandContainer.initialize(static_cast(device)->neoDevice); ze_result_t returnType = parseErrorCode(returnValue); if (returnType == ZE_RESULT_SUCCESS) { if (!isCopyOnly()) { if (!NEO::ApiSpecificConfig::getBindlessConfiguration()) { - programStateBaseAddress(commandContainer, false); + if (!this->isFlushTaskSubmissionEnabled) { + programStateBaseAddress(commandContainer, false); + } } commandContainer.setDirtyStateForAllHeaps(false); programThreadArbitrationPolicy(device); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl index 92f4d7162f..ca478eddf7 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl @@ -46,9 +46,11 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(z const auto functionImmutableData = kernel->getImmutableData(); auto perThreadScratchSize = std::max(this->getCommandListPerThreadScratchSize(), kernel->getImmutableData()->getDescriptor().kernelAttributes.perThreadScratchSize[0]); - this->setCommandListPerThreadScratchSize(perThreadScratchSize); + auto slmEnable = (kernel->getImmutableData()->getDescriptor().kernelAttributes.slmInlineSize > 0); + this->setCommandListSLMEnable(slmEnable); + auto kernelPreemptionMode = obtainFunctionPreemptionMode(kernel); commandListPreemptionMode = std::min(commandListPreemptionMode, kernelPreemptionMode); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h index 780726d626..9ebd8ee0e1 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h @@ -13,6 +13,7 @@ namespace L0 { struct EventPool; struct Event; +constexpr size_t maxImmediateCommandSize = 4 * MemoryConstants::kiloByte; template struct CommandListCoreFamilyImmediate : public CommandListCoreFamily { @@ -90,13 +91,26 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl index edf2e533da..4885b028d7 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl @@ -7,32 +7,122 @@ #pragma once +#include "shared/source/helpers/hw_helper.h" +#include "shared/source/helpers/hw_info.h" + #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h" -#include "level_zero/core/source/cmdqueue/cmdqueue_imp.h" namespace L0 { +template +void CommandListCoreFamilyImmediate::checkAvailableSpace() { + if (this->commandContainer.getCommandStream()->getAvailableSpace() < maxImmediateCommandSize) { + this->commandContainer.allocateNextCommandBuffer(); + cmdListBBEndOffset = 0; + } +} + +template +ze_result_t CommandListCoreFamilyImmediate::executeCommandListImmediateWithFlushTask(bool performMigration) { + + NEO::DispatchFlags dispatchFlags( + {}, //csrDependencies + nullptr, //barrierTimestampPacketNodes + {}, //pipelineSelectArgs + nullptr, //flushStampReference + NEO::QueueThrottle::MEDIUM, //throttle + this->getCommandListPreemptionMode(), //preemptionMode + this->commandContainer.lastSentNumGrfRequired, //numGrfRequired + NEO::L3CachingSettings::l3CacheOn, //l3CacheSettings + this->getThreadArbitrationPolicy(), //threadArbitrationPolicy + NEO::AdditionalKernelExecInfo::NotApplicable, //additionalKernelExecInfo + NEO::KernelExecutionType::NotApplicable, //kernelExecutionType + NEO::MemoryCompressionState::NotApplicable, //memoryCompressionState + NEO::QueueSliceCount::defaultSliceCount, //sliceCount + this->isSyncModeQueue, //blocking + this->isSyncModeQueue, //dcFlush + this->getCommandListSLMEnable(), //useSLM + this->isSyncModeQueue, //guardCommandBufferWithPipeControl + false, //GSBA32BitRequired + false, //requiresCoherency + false, //lowPriority + true, //implicitFlush + this->csr->isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed + false, //epilogueRequired + false, //usePerDssBackedBuffer + false, //useSingleSubdevice + false, //useGlobalAtomics + this->device->getNEODevice()->getNumAvailableDevices() //numDevicesInContext + ); + + this->commandContainer.removeDuplicatesFromResidencyContainer(); + + auto commandStream = this->commandContainer.getCommandStream(); + size_t commandStreamStart = cmdListBBEndOffset; + + auto lockCSR = this->csr->obtainUniqueOwnership(); + + this->csr->setRequiredScratchSizes(this->getCommandListPerThreadScratchSize(), this->getCommandListPerThreadScratchSize()); + + auto completionStamp = this->csr->flushTask( + *commandStream, + commandStreamStart, + *(this->commandContainer.getIndirectHeap(NEO::IndirectHeap::DYNAMIC_STATE)), + *(this->commandContainer.getIndirectHeap(NEO::IndirectHeap::INDIRECT_OBJECT)), + *(this->commandContainer.getIndirectHeap(NEO::IndirectHeap::SURFACE_STATE)), + this->csr->peekTaskLevel(), + dispatchFlags, + *(this->device->getNEODevice())); + + if (this->isSyncModeQueue) { + auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout; + this->csr->waitForCompletionWithTimeout(false, timeoutMicroseconds, completionStamp.taskCount); + this->removeHostPtrAllocations(); + } + + cmdListBBEndOffset = commandStream->getUsed(); + + this->commandContainer.getResidencyContainer().clear(); + + return ZE_RESULT_SUCCESS; +} + template ze_result_t CommandListCoreFamilyImmediate::appendLaunchKernel( ze_kernel_handle_t hKernel, const ze_group_count_t *pThreadGroupDimensions, - ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { + ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { + if (this->isFlushTaskSubmissionEnabled) { + checkAvailableSpace(); + } auto ret = CommandListCoreFamily::appendLaunchKernel(hKernel, pThreadGroupDimensions, - hEvent, numWaitEvents, phWaitEvents); + hSignalEvent, numWaitEvents, phWaitEvents); if (ret == ZE_RESULT_SUCCESS) { - executeCommandListImmediate(true); + if (this->isFlushTaskSubmissionEnabled) { + executeCommandListImmediateWithFlushTask(true); + } else { + executeCommandListImmediate(true); + } } return ret; } + template ze_result_t CommandListCoreFamilyImmediate::appendLaunchKernelIndirect( ze_kernel_handle_t hKernel, const ze_group_count_t *pDispatchArgumentsBuffer, - ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { + ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { + if (this->isFlushTaskSubmissionEnabled) { + checkAvailableSpace(); + } auto ret = CommandListCoreFamily::appendLaunchKernelIndirect(hKernel, pDispatchArgumentsBuffer, - hEvent, numWaitEvents, phWaitEvents); + hSignalEvent, numWaitEvents, phWaitEvents); if (ret == ZE_RESULT_SUCCESS) { - executeCommandListImmediate(true); + if (this->isFlushTaskSubmissionEnabled) { + executeCommandListImmediateWithFlushTask(true); + } else { + executeCommandListImmediate(true); + } } return ret; } @@ -42,6 +132,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendBarrier( ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { + ze_result_t ret = ZE_RESULT_SUCCESS; bool isTimestampEvent = false; for (uint32_t i = 0; i < numWaitEvents; i++) { auto event = Event::fromHandle(phWaitEvents[i]); @@ -51,24 +142,34 @@ ze_result_t CommandListCoreFamilyImmediate::appendBarrier( auto signalEvent = Event::fromHandle(hSignalEvent); isTimestampEvent |= signalEvent->isEventTimestampFlagSet(); } - if (isSyncModeQueue || isTimestampEvent) { - auto ret = CommandListCoreFamily::appendBarrier(hSignalEvent, numWaitEvents, phWaitEvents); - if (ret == ZE_RESULT_SUCCESS) { - executeCommandListImmediate(true); + + if (isTimestampEvent) { + if (this->isFlushTaskSubmissionEnabled) { + checkAvailableSpace(); + } + ret = CommandListCoreFamily::appendBarrier(hSignalEvent, numWaitEvents, phWaitEvents); + if (ret == ZE_RESULT_SUCCESS) { + if (this->isFlushTaskSubmissionEnabled) { + executeCommandListImmediateWithFlushTask(true); + } else { + executeCommandListImmediate(true); + } } - return ret; } else { - auto ret = appendWaitOnEvents(numWaitEvents, phWaitEvents); + ret = CommandListCoreFamilyImmediate::appendWaitOnEvents(numWaitEvents, phWaitEvents); if (!hSignalEvent) { NEO::PipeControlArgs args; - auto cmdQueueImp = static_cast(this->cmdQImmediate); - NEO::CommandStreamReceiver *csr = cmdQueueImp->getCsr(); - csr->flushNonKernelTask(nullptr, 0, 0, args, false, false, false); + this->csr->flushNonKernelTask(nullptr, 0, 0, args, false, false, false); + if (this->isSyncModeQueue) { + this->csr->flushTagUpdate(); + auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout; + this->csr->waitForCompletionWithTimeout(false, timeoutMicroseconds, this->csr->peekTaskCount()); + } } else { - ret = appendSignalEvent(hSignalEvent); + ret = CommandListCoreFamilyImmediate::appendSignalEvent(hSignalEvent); } - return ret; } + return ret; } template @@ -80,13 +181,21 @@ ze_result_t CommandListCoreFamilyImmediate::appendMemoryCopy( uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { + if (this->isFlushTaskSubmissionEnabled) { + checkAvailableSpace(); + } auto ret = CommandListCoreFamily::appendMemoryCopy(dstptr, srcptr, size, hSignalEvent, numWaitEvents, phWaitEvents); if (ret == ZE_RESULT_SUCCESS) { - executeCommandListImmediate(true); + if (this->isFlushTaskSubmissionEnabled) { + executeCommandListImmediateWithFlushTask(true); + } else { + executeCommandListImmediate(true); + } } return ret; } + template ze_result_t CommandListCoreFamilyImmediate::appendMemoryCopyRegion( void *dstPtr, @@ -101,11 +210,18 @@ ze_result_t CommandListCoreFamilyImmediate::appendMemoryCopyRegio uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { + if (this->isFlushTaskSubmissionEnabled) { + checkAvailableSpace(); + } auto ret = CommandListCoreFamily::appendMemoryCopyRegion(dstPtr, dstRegion, dstPitch, dstSlicePitch, srcPtr, srcRegion, srcPitch, srcSlicePitch, hSignalEvent, numWaitEvents, phWaitEvents); if (ret == ZE_RESULT_SUCCESS) { - executeCommandListImmediate(true); + if (this->isFlushTaskSubmissionEnabled) { + executeCommandListImmediateWithFlushTask(true); + } else { + executeCommandListImmediate(true); + } } return ret; } @@ -116,55 +232,86 @@ ze_result_t CommandListCoreFamilyImmediate::appendMemoryFill(void ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { + + if (this->isFlushTaskSubmissionEnabled) { + checkAvailableSpace(); + } auto ret = CommandListCoreFamily::appendMemoryFill(ptr, pattern, patternSize, size, hSignalEvent, numWaitEvents, phWaitEvents); if (ret == ZE_RESULT_SUCCESS) { - executeCommandListImmediate(true); + if (this->isFlushTaskSubmissionEnabled) { + executeCommandListImmediateWithFlushTask(true); + } else { + executeCommandListImmediate(true); + } } return ret; } template -ze_result_t CommandListCoreFamilyImmediate::appendSignalEvent(ze_event_handle_t hEvent) { - auto event = Event::fromHandle(hEvent); - if (isSyncModeQueue || event->isEventTimestampFlagSet()) { - auto ret = CommandListCoreFamily::appendSignalEvent(hEvent); - if (ret == ZE_RESULT_SUCCESS) { - executeCommandListImmediate(true); +ze_result_t CommandListCoreFamilyImmediate::appendSignalEvent(ze_event_handle_t hSignalEvent) { + ze_result_t ret = ZE_RESULT_SUCCESS; + auto event = Event::fromHandle(hSignalEvent); + bool isTimestampEvent = event->isEventTimestampFlagSet(); + + if (isTimestampEvent) { + if (this->isFlushTaskSubmissionEnabled) { + checkAvailableSpace(); + } + ret = CommandListCoreFamily::appendSignalEvent(hSignalEvent); + if (ret == ZE_RESULT_SUCCESS) { + if (this->isFlushTaskSubmissionEnabled) { + executeCommandListImmediateWithFlushTask(true); + } else { + executeCommandListImmediate(true); + } } - return ret; } else { NEO::PipeControlArgs args; args.dcFlushEnable = (!event->signalScope) ? false : true; - auto cmdQueueImp = static_cast(this->cmdQImmediate); - NEO::CommandStreamReceiver *csr = cmdQueueImp->getCsr(); - csr->flushNonKernelTask(&event->getAllocation(this->device), event->getGpuAddress(this->device), Event::STATE_SIGNALED, args, false, false, false); - event->updateTaskCountEnabled = true; - return ZE_RESULT_SUCCESS; + this->csr->flushNonKernelTask(&event->getAllocation(this->device), event->getGpuAddress(this->device), Event::STATE_SIGNALED, args, false, false, false); + if (this->isSyncModeQueue) { + this->csr->flushTagUpdate(); + auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout; + this->csr->waitForCompletionWithTimeout(false, timeoutMicroseconds, this->csr->peekTaskCount()); + } } + return ret; } template -ze_result_t CommandListCoreFamilyImmediate::appendEventReset(ze_event_handle_t hEvent) { - auto event = Event::fromHandle(hEvent); - if (isSyncModeQueue || event->isEventTimestampFlagSet()) { - auto ret = CommandListCoreFamily::appendEventReset(hEvent); - if (ret == ZE_RESULT_SUCCESS) { - executeCommandListImmediate(true); +ze_result_t CommandListCoreFamilyImmediate::appendEventReset(ze_event_handle_t hSignalEvent) { + ze_result_t ret = ZE_RESULT_SUCCESS; + auto event = Event::fromHandle(hSignalEvent); + bool isTimestampEvent = event->isEventTimestampFlagSet(); + + if (isTimestampEvent) { + if (this->isFlushTaskSubmissionEnabled) { + checkAvailableSpace(); + } + ret = CommandListCoreFamily::appendEventReset(hSignalEvent); + if (ret == ZE_RESULT_SUCCESS) { + if (this->isFlushTaskSubmissionEnabled) { + executeCommandListImmediateWithFlushTask(true); + } else { + executeCommandListImmediate(true); + } } - return ret; } else { NEO::PipeControlArgs args; args.dcFlushEnable = (!event->signalScope) ? false : true; - auto cmdQueueImp = static_cast(this->cmdQImmediate); - NEO::CommandStreamReceiver *csr = cmdQueueImp->getCsr(); - csr->flushNonKernelTask(&event->getAllocation(this->device), event->getGpuAddress(this->device), Event::STATE_CLEARED, args, false, false, false); - event->updateTaskCountEnabled = true; - return ZE_RESULT_SUCCESS; + this->csr->flushNonKernelTask(&event->getAllocation(this->device), event->getGpuAddress(this->device), Event::STATE_CLEARED, args, false, false, false); + if (this->isSyncModeQueue) { + this->csr->flushTagUpdate(); + auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout; + this->csr->waitForCompletionWithTimeout(false, timeoutMicroseconds, this->csr->peekTaskCount()); + } } + return ret; } template ze_result_t CommandListCoreFamilyImmediate::appendPageFaultCopy(NEO::GraphicsAllocation *dstptr, NEO::GraphicsAllocation *srcptr, size_t size, bool flushHost) { + auto ret = CommandListCoreFamily::appendPageFaultCopy(dstptr, srcptr, size, flushHost); if (ret == ZE_RESULT_SUCCESS) { executeCommandListImmediate(false); @@ -173,47 +320,61 @@ ze_result_t CommandListCoreFamilyImmediate::appendPageFaultCopy(N } template -ze_result_t CommandListCoreFamilyImmediate::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent) { +ze_result_t CommandListCoreFamilyImmediate::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phWaitEvents) { + ze_result_t ret = ZE_RESULT_SUCCESS; bool isTimestampEvent = false; + for (uint32_t i = 0; i < numEvents; i++) { - auto event = Event::fromHandle(phEvent[i]); + auto event = Event::fromHandle(phWaitEvents[i]); isTimestampEvent |= (event->isEventTimestampFlagSet()) ? true : false; } - if (isSyncModeQueue || isTimestampEvent) { - auto ret = CommandListCoreFamily::appendWaitOnEvents(numEvents, phEvent); - if (ret == ZE_RESULT_SUCCESS) { - executeCommandListImmediate(true); + + if (isTimestampEvent) { + if (this->isFlushTaskSubmissionEnabled) { + checkAvailableSpace(); + } + ret = CommandListCoreFamily::appendWaitOnEvents(numEvents, phWaitEvents); + if (ret == ZE_RESULT_SUCCESS) { + if (this->isFlushTaskSubmissionEnabled) { + executeCommandListImmediateWithFlushTask(true); + } else { + executeCommandListImmediate(true); + } } - return ret; } else { bool dcFlushRequired = false; for (uint32_t i = 0; i < numEvents; i++) { - auto event = Event::fromHandle(phEvent[i]); + auto event = Event::fromHandle(phWaitEvents[i]); dcFlushRequired |= (!event->waitScope) ? false : true; } - auto cmdQueueImp = static_cast(this->cmdQImmediate); - NEO::CommandStreamReceiver *csr = cmdQueueImp->getCsr(); NEO::PipeControlArgs args; args.dcFlushEnable = dcFlushRequired; for (uint32_t i = 0; i < numEvents; i++) { - auto event = Event::fromHandle(phEvent[i]); + auto event = Event::fromHandle(phWaitEvents[i]); bool isStartOfDispatch = (i == 0); bool isEndOfDispatch = (i == numEvents - 1); - csr->flushNonKernelTask(&event->getAllocation(this->device), event->getGpuAddress(this->device), Event::STATE_CLEARED, args, true, isStartOfDispatch, isEndOfDispatch); - event->updateTaskCountEnabled = true; + this->csr->flushNonKernelTask(&event->getAllocation(this->device), event->getGpuAddress(this->device), Event::STATE_CLEARED, args, true, isStartOfDispatch, isEndOfDispatch); } - return ZE_RESULT_SUCCESS; } + return ret; } template ze_result_t CommandListCoreFamilyImmediate::appendWriteGlobalTimestamp( uint64_t *dstptr, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { + + if (this->isFlushTaskSubmissionEnabled) { + checkAvailableSpace(); + } auto ret = CommandListCoreFamily::appendWriteGlobalTimestamp(dstptr, hSignalEvent, numWaitEvents, phWaitEvents); if (ret == ZE_RESULT_SUCCESS) { - executeCommandListImmediate(true); + if (this->isFlushTaskSubmissionEnabled) { + executeCommandListImmediateWithFlushTask(true); + } else { + executeCommandListImmediate(true); + } } return ret; } @@ -226,19 +387,61 @@ ze_result_t CommandListCoreFamilyImmediate::appendMemoryCopyFromC return CommandListCoreFamilyImmediate::appendMemoryCopy(dstptr, srcptr, size, hSignalEvent, numWaitEvents, phWaitEvents); } +template +ze_result_t CommandListCoreFamilyImmediate::appendImageCopy( + ze_image_handle_t dst, ze_image_handle_t src, + ze_event_handle_t hSignalEvent, + uint32_t numWaitEvents, + ze_event_handle_t *phWaitEvents) { + + return CommandListCoreFamilyImmediate::appendImageCopyRegion(dst, src, nullptr, nullptr, hSignalEvent, + numWaitEvents, phWaitEvents); +} + +template +ze_result_t CommandListCoreFamilyImmediate::appendImageCopyRegion(ze_image_handle_t hDstImage, + ze_image_handle_t hSrcImage, + const ze_image_region_t *pDstRegion, + const ze_image_region_t *pSrcRegion, + ze_event_handle_t hSignalEvent, + uint32_t numWaitEvents, + ze_event_handle_t *phWaitEvents) { + + if (this->isFlushTaskSubmissionEnabled) { + checkAvailableSpace(); + } + auto ret = CommandListCoreFamily::appendImageCopyRegion(hDstImage, hSrcImage, pDstRegion, pSrcRegion, hSignalEvent, + numWaitEvents, phWaitEvents); + if (ret == ZE_RESULT_SUCCESS) { + if (this->isFlushTaskSubmissionEnabled) { + executeCommandListImmediateWithFlushTask(true); + } else { + executeCommandListImmediate(true); + } + } + return ret; +} + template ze_result_t CommandListCoreFamilyImmediate::appendImageCopyFromMemory( ze_image_handle_t hDstImage, const void *srcPtr, const ze_image_region_t *pDstRegion, - ze_event_handle_t hEvent, + ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { - auto ret = CommandListCoreFamily::appendImageCopyFromMemory(hDstImage, srcPtr, pDstRegion, hEvent, + if (this->isFlushTaskSubmissionEnabled) { + checkAvailableSpace(); + } + auto ret = CommandListCoreFamily::appendImageCopyFromMemory(hDstImage, srcPtr, pDstRegion, hSignalEvent, numWaitEvents, phWaitEvents); if (ret == ZE_RESULT_SUCCESS) { - executeCommandListImmediate(true); + if (this->isFlushTaskSubmissionEnabled) { + executeCommandListImmediateWithFlushTask(true); + } else { + executeCommandListImmediate(true); + } } return ret; } @@ -248,15 +451,23 @@ ze_result_t CommandListCoreFamilyImmediate::appendImageCopyToMemo void *dstPtr, ze_image_handle_t hSrcImage, const ze_image_region_t *pSrcRegion, - ze_event_handle_t hEvent, + ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { - auto ret = CommandListCoreFamily::appendImageCopyToMemory(dstPtr, hSrcImage, pSrcRegion, hEvent, + if (this->isFlushTaskSubmissionEnabled) { + checkAvailableSpace(); + } + auto ret = CommandListCoreFamily::appendImageCopyToMemory(dstPtr, hSrcImage, pSrcRegion, hSignalEvent, numWaitEvents, phWaitEvents); if (ret == ZE_RESULT_SUCCESS) { - executeCommandListImmediate(true); + if (this->isFlushTaskSubmissionEnabled) { + executeCommandListImmediateWithFlushTask(true); + } else { + executeCommandListImmediate(true); + } } return ret; } + } // namespace L0 diff --git a/level_zero/core/source/cmdlist/cmdlist_imp.cpp b/level_zero/core/source/cmdlist/cmdlist_imp.cpp index 5dafb74449..844c536f98 100644 --- a/level_zero/core/source/cmdlist/cmdlist_imp.cpp +++ b/level_zero/core/source/cmdlist/cmdlist_imp.cpp @@ -27,6 +27,11 @@ CommandListAllocatorFn commandListFactory[IGFX_MAX_PRODUCT] = {}; CommandListAllocatorFn commandListFactoryImmediate[IGFX_MAX_PRODUCT] = {}; ze_result_t CommandListImp::destroy() { + if (this->isFlushTaskSubmissionEnabled && !this->isSyncModeQueue) { + this->csr->flushTagUpdate(); + auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout; + this->csr->waitForCompletionWithTimeout(false, timeoutMicroseconds, this->csr->peekTaskCount()); + } delete this; return ZE_RESULT_SUCCESS; } @@ -65,8 +70,6 @@ CommandList *CommandList::create(uint32_t productFamily, Device *device, NEO::En if (returnValue != ZE_RESULT_SUCCESS) { commandList->destroy(); commandList = nullptr; - } else { - commandList->setSyncModeQueue(false); } } @@ -89,6 +92,8 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device if (allocator) { commandList = static_cast((*allocator)(CommandList::commandListimmediateIddsPerBlock)); commandList->internalUsage = internalUsage; + commandList->cmdListType = CommandListType::TYPE_IMMEDIATE; + commandList->isSyncModeQueue = (desc->mode == ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS); returnValue = commandList->initialize(device, engineGroupType, desc->flags); if (returnValue != ZE_RESULT_SUCCESS) { commandList->destroy(); @@ -113,9 +118,8 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device } commandList->cmdQImmediate = commandQueue; - commandList->cmdListType = CommandListType::TYPE_IMMEDIATE; + commandList->csr = csr; commandList->commandListPreemptionMode = device->getDevicePreemptionMode(); - commandList->setSyncModeQueue(desc->mode == ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS); return commandList; } diff --git a/level_zero/core/source/cmdlist/cmdlist_imp.h b/level_zero/core/source/cmdlist/cmdlist_imp.h index e67b33bdf4..c9046f7252 100644 --- a/level_zero/core/source/cmdlist/cmdlist_imp.h +++ b/level_zero/core/source/cmdlist/cmdlist_imp.h @@ -8,6 +8,7 @@ #pragma once #include "level_zero/core/source/cmdlist/cmdlist.h" +#include "level_zero/core/source/cmdqueue/cmdqueue_imp.h" #include "level_zero/core/source/device/device.h" namespace L0 { diff --git a/level_zero/core/source/event/event.h b/level_zero/core/source/event/event.h index 86c920c0ce..1af0e9da10 100644 --- a/level_zero/core/source/event/event.h +++ b/level_zero/core/source/event/event.h @@ -70,7 +70,6 @@ struct Event : _ze_event_handle_t { uint32_t kernelCount = 1u; ze_event_scope_flags_t signalScope = 0u; ze_event_scope_flags_t waitScope = 0u; - bool updateTaskCountEnabled = false; uint64_t globalStartTS; uint64_t globalEndTS; diff --git a/level_zero/core/source/event/event_impl.inl b/level_zero/core/source/event/event_impl.inl index 6ad529d187..dab9cc30c6 100644 --- a/level_zero/core/source/event/event_impl.inl +++ b/level_zero/core/source/event/event_impl.inl @@ -113,11 +113,6 @@ ze_result_t EventImp::queryStatus() { return retVal; } - if (updateTaskCountEnabled) { - this->csr->flushTagUpdate(); - updateTaskCountEnabled = false; - } - return retVal; } @@ -160,11 +155,6 @@ ze_result_t EventImp::hostEventSetValue(uint32_t eventVal) { UNRECOVERABLE_IF(hostAddr == nullptr); memcpy_s(static_cast(hostAddr), sizeof(uint32_t), static_cast(&eventVal), sizeof(uint32_t)); - if (updateTaskCountEnabled) { - this->csr->flushTagUpdate(); - updateTaskCountEnabled = false; - } - NEO::CpuIntrinsics::clFlush(hostAddr); return ZE_RESULT_SUCCESS; diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index 0a416176ec..bad76bb26f 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -338,9 +338,6 @@ struct MockCommandList : public CommandList { NEO::EngineGroupType engineGroupType, ze_command_list_flags_t flags)); - ADDMETHOD_NOBASE(setSyncModeQueue, ze_result_t, ZE_RESULT_SUCCESS, - (bool syncMode)); - uint8_t *batchBuffer = nullptr; NEO::GraphicsAllocation *mockAllocation = nullptr; }; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp index aafd6abe1e..a7ba6ec484 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp @@ -300,7 +300,6 @@ TEST_F(CommandListCreate, whenCreatingImmCmdListWithSyncModeAndAppendSignalEvent ASSERT_EQ(static_cast(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr); commandList->appendSignalEvent(event); - EXPECT_EQ(false, event_object->updateTaskCountEnabled); auto result = event_object->hostSignal(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); @@ -339,8 +338,7 @@ TEST_F(CommandListCreate, whenCreatingImmCmdListWithSyncModeAndAppendBarrierThen ASSERT_NE(nullptr, event_object->csr); ASSERT_EQ(static_cast(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr); - commandList->appendBarrier(event, 0, nullptr); - EXPECT_EQ(false, event_object->updateTaskCountEnabled); + commandList->appendBarrier(nullptr, 1, &event); auto result = event_object->hostSignal(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); @@ -348,7 +346,6 @@ TEST_F(CommandListCreate, whenCreatingImmCmdListWithSyncModeAndAppendBarrierThen EXPECT_EQ(event_object->queryStatus(), ZE_RESULT_SUCCESS); commandList->appendBarrier(nullptr, 0, nullptr); - EXPECT_EQ(false, event_object->updateTaskCountEnabled); } TEST_F(CommandListCreate, whenCreatingImmCmdListWithSyncModeAndAppendResetEventThenUpdateTaskCountNeededFlagIsDisabled) { @@ -383,7 +380,6 @@ TEST_F(CommandListCreate, whenCreatingImmCmdListWithSyncModeAndAppendResetEventT ASSERT_EQ(static_cast(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr); commandList->appendEventReset(event); - EXPECT_EQ(false, event_object->updateTaskCountEnabled); auto result = event_object->hostSignal(); ASSERT_EQ(ZE_RESULT_SUCCESS, result); @@ -423,10 +419,8 @@ TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndAppendSignalEven ASSERT_EQ(static_cast(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr); commandList->appendSignalEvent(event); - EXPECT_EQ(true, event_object->updateTaskCountEnabled); auto result = event_object->hostSignal(); - EXPECT_EQ(false, event_object->updateTaskCountEnabled); ASSERT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(event_object->queryStatus(), ZE_RESULT_SUCCESS); @@ -464,16 +458,13 @@ TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndAppendBarrierThe ASSERT_EQ(static_cast(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr); commandList->appendBarrier(event, 0, nullptr); - EXPECT_EQ(true, event_object->updateTaskCountEnabled); auto result = event_object->hostSignal(); - EXPECT_EQ(false, event_object->updateTaskCountEnabled); ASSERT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(event_object->queryStatus(), ZE_RESULT_SUCCESS); commandList->appendBarrier(nullptr, 0, nullptr); - EXPECT_EQ(false, event_object->updateTaskCountEnabled); } TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndCopyEngineAndAppendBarrierThenUpdateTaskCountNeededFlagIsEnabled) { @@ -509,16 +500,13 @@ TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndCopyEngineAndApp ASSERT_EQ(static_cast(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr); commandList->appendBarrier(event, 0, nullptr); - EXPECT_EQ(true, event_object->updateTaskCountEnabled); auto result = event_object->hostSignal(); - EXPECT_EQ(false, event_object->updateTaskCountEnabled); ASSERT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(event_object->queryStatus(), ZE_RESULT_SUCCESS); commandList->appendBarrier(nullptr, 0, nullptr); - EXPECT_EQ(false, event_object->updateTaskCountEnabled); } TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndAppendEventResetThenUpdateTaskCountNeededFlagIsEnabled) { @@ -553,10 +541,8 @@ TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndAppendEventReset ASSERT_EQ(static_cast(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr); commandList->appendEventReset(event); - EXPECT_EQ(true, event_object->updateTaskCountEnabled); auto result = event_object->hostSignal(); - EXPECT_EQ(false, event_object->updateTaskCountEnabled); ASSERT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(event_object->queryStatus(), ZE_RESULT_SUCCESS); @@ -937,44 +923,6 @@ HWTEST_F(CommandListCreate, givenCommandListWhenSetBarrierThenPipeControlIsProgr EXPECT_NE(cmdList.end(), itor); } -HWTEST_F(CommandListCreate, givenSyncCmdQueueAndCopyOnlyImmediateCommandListWhenAppendWaitEventsWithHostScopeThenMiFlushAndSemWaitAreAdded) { - using SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - - ze_command_queue_desc_t desc = {}; - desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; - ze_result_t returnValue; - std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::Copy, returnValue)); - ASSERT_NE(nullptr, commandList); - - EXPECT_EQ(device, commandList->device); - EXPECT_EQ(1u, commandList->cmdListType); - EXPECT_NE(nullptr, commandList->cmdQImmediate); - - auto &commandContainer = commandList->commandContainer; - - ze_event_pool_desc_t eventPoolDesc = {}; - eventPoolDesc.count = 2; - eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; - - ze_event_desc_t eventDesc = {}; - eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; - auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc)); - auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); - auto event2 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); - ze_event_handle_t events[] = {event->toHandle(), event2->toHandle()}; - - auto used = commandContainer.getCommandStream()->getUsed(); - commandList->appendWaitOnEvents(2, events); - EXPECT_EQ(false, event->updateTaskCountEnabled); - EXPECT_EQ(false, event2->updateTaskCountEnabled); - GenCmdList cmdList; - ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( - cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); - - EXPECT_EQ(used, commandContainer.getCommandStream()->getUsed()); -} - using Platforms = IsAtLeastProduct; HWTEST2_F(CommandListCreate, givenCopyCommandListWhenProfilingBeforeCommandForCopyOnlyThenCommandsHaveCorrectEventOffsets, Platforms) { diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp index 06488d3567..d3689bb38d 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp @@ -235,6 +235,78 @@ HWTEST2_F(CommandListCreate, givenCommandListAnd3DWhbufferenMemoryCopyRegionCall EXPECT_GT(cmdList.appendMemoryCopyKernel3dCalledTimes, 0u); } +HWTEST2_F(CommandListCreate, givenImmediateCommandListWhenAppendingMemoryCopyThenSuccessIsReturned, Platforms) { + const ze_command_queue_desc_t desc = {}; + bool internalEngine = true; + + ze_result_t returnValue; + std::unique_ptr commandList0(CommandList::createImmediate(productFamily, + device, + &desc, + internalEngine, + NEO::EngineGroupType::RenderCompute, + returnValue)); + ASSERT_NE(nullptr, commandList0); + + CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); + EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); + + void *srcPtr = reinterpret_cast(0x1234); + void *dstPtr = reinterpret_cast(0x2345); + + auto result = commandList0->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); +} + +HWTEST2_F(CommandListCreate, givenImmediateCommandListWhenAppendingMemoryCopyWithInvalidEventThenInvalidArgumentErrorIsReturned, Platforms) { + const ze_command_queue_desc_t desc = {}; + bool internalEngine = true; + + ze_result_t returnValue; + std::unique_ptr commandList0(CommandList::createImmediate(productFamily, + device, + &desc, + internalEngine, + NEO::EngineGroupType::RenderCompute, + returnValue)); + ASSERT_NE(nullptr, commandList0); + + CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); + EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); + + void *srcPtr = reinterpret_cast(0x1234); + void *dstPtr = reinterpret_cast(0x2345); + + auto result = commandList0->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 1, nullptr); + ASSERT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); +} + +HWTEST2_F(CommandListCreate, givenCommandListAndHostPointersWhenMemoryCopyCalledThenPipeControlWithDcFlushAdded, Platforms) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + + ze_result_t result; + std::unique_ptr commandList0(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, result)); + ASSERT_NE(nullptr, commandList0); + + void *srcPtr = reinterpret_cast(0x1234); + void *dstPtr = reinterpret_cast(0x2345); + result = commandList0->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + auto &commandContainer = commandList0->commandContainer; + GenCmdList genCmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + genCmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); + auto itor = find(genCmdList.begin(), genCmdList.end()); + ASSERT_NE(genCmdList.end(), itor); + PIPE_CONTROL *cmd = nullptr; + while (itor != genCmdList.end()) { + cmd = genCmdCast(*itor); + itor = find(++itor, genCmdList.end()); + } + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(), cmd->getDcFlushEnable()); +} + HWTEST2_F(CommandListCreate, givenCommandListAnd2DWhbufferenMemoryCopyRegionCalledThenCopyKernel2DCalled, Platforms) { MockCommandListHw cmdList; cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp index 26930dfdc5..8c373e7724 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp @@ -11,6 +11,7 @@ #include "opencl/test/unit_test/mocks/mock_memory_manager.h" #include "test.h" +#include "level_zero/core/source/image/image_hw.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" @@ -727,16 +728,20 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyRegionWithSignalAndIn } HWTEST2_F(CommandListCreate, givenImmediateCommandListWhenMemoryCopyRegionWithSignalAndWaitEventsUsingRenderEngineThenSuccessIsReturned, Platforms) { - Mock cmdQueue; + const ze_command_queue_desc_t desc = {}; + bool internalEngine = true; ze_result_t result = ZE_RESULT_SUCCESS; - auto commandList = std::make_unique>>(); - ASSERT_NE(nullptr, commandList); - ze_result_t ret = commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - ASSERT_EQ(ZE_RESULT_SUCCESS, ret); - commandList->device = device; - commandList->cmdQImmediate = &cmdQueue; - commandList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE; + std::unique_ptr commandList0(CommandList::createImmediate(productFamily, + device, + &desc, + internalEngine, + NEO::EngineGroupType::RenderCompute, + result)); + ASSERT_NE(nullptr, commandList0); + + CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); + EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); void *src_buffer = reinterpret_cast(0x1234); void *dst_buffer = reinterpret_cast(0x2345); @@ -758,28 +763,74 @@ HWTEST2_F(CommandListCreate, givenImmediateCommandListWhenMemoryCopyRegionWithSi auto event1 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event1.get()); - EXPECT_CALL(cmdQueue, executeCommandLists).Times(1).WillRepeatedly(::testing::Return(ZE_RESULT_SUCCESS)); - EXPECT_CALL(cmdQueue, synchronize).Times(1).WillRepeatedly(::testing::Return(ZE_RESULT_SUCCESS)); + ze_copy_region_t sr = {0U, 0U, 0U, width, height, 0U}; + ze_copy_region_t dr = {0U, 0U, 0U, width, height, 0U}; + result = commandList0->appendMemoryCopyRegion(dst_buffer, &dr, width, 0, + src_buffer, &sr, width, 0, events[0], 1u, &events[1]); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); +} + +TEST_F(CommandListCreate, givenImmediateCommandListWhenMemoryCopyRegionWithSignalAndWaitEventsUsingRenderEngineInALoopThenSuccessIsReturned) { + const ze_command_queue_desc_t desc = {}; + bool internalEngine = true; + + ze_result_t ret = ZE_RESULT_SUCCESS; + std::unique_ptr commandList0(CommandList::createImmediate(productFamily, + device, + &desc, + internalEngine, + NEO::EngineGroupType::RenderCompute, + ret)); + ASSERT_NE(nullptr, commandList0); + + CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); + EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); + + void *src_buffer = reinterpret_cast(0x1234); + void *dst_buffer = reinterpret_cast(0x2345); + uint32_t width = 16; + uint32_t height = 16; + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 2; + auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc)); + + std::vector events; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; + auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + events.push_back(event.get()); + eventDesc.index = 1; + auto event1 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + events.push_back(event1.get()); ze_copy_region_t sr = {0U, 0U, 0U, width, height, 0U}; ze_copy_region_t dr = {0U, 0U, 0U, width, height, 0U}; - result = commandList->appendMemoryCopyRegion(dst_buffer, &dr, width, 0, - src_buffer, &sr, width, 0, events[0], 1u, &events[1]); - EXPECT_EQ(ZE_RESULT_SUCCESS, result); - commandList->cmdQImmediate = nullptr; + + for (auto i = 0; i < 2000; i++) { + ret = commandList0->appendMemoryCopyRegion(dst_buffer, &dr, width, 0, + src_buffer, &sr, width, 0, events[0], 1u, &events[1]); + } + EXPECT_EQ(ZE_RESULT_SUCCESS, ret); } HWTEST2_F(CommandListCreate, givenImmediateCommandListWhenMemoryCopyRegionWithSignalAndWaitEventsUsingCopyEngineThenSuccessIsReturned, Platforms) { - Mock cmdQueue; + const ze_command_queue_desc_t desc = {}; + bool internalEngine = true; - ze_result_t result = ZE_RESULT_SUCCESS; - auto commandList = std::make_unique>>(); - ASSERT_NE(nullptr, commandList); - ze_result_t ret = commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); - ASSERT_EQ(ZE_RESULT_SUCCESS, ret); - commandList->device = device; - commandList->cmdQImmediate = &cmdQueue; - commandList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE; + ze_result_t returnValue; + std::unique_ptr commandList0(CommandList::createImmediate(productFamily, + device, + &desc, + internalEngine, + NEO::EngineGroupType::Copy, + returnValue)); + ASSERT_NE(nullptr, commandList0); + + CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); + EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); void *src_buffer = reinterpret_cast(0x1234); void *dst_buffer = reinterpret_cast(0x2345); @@ -801,28 +852,150 @@ HWTEST2_F(CommandListCreate, givenImmediateCommandListWhenMemoryCopyRegionWithSi auto event1 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event1.get()); - EXPECT_CALL(cmdQueue, executeCommandLists).Times(1).WillRepeatedly(::testing::Return(ZE_RESULT_SUCCESS)); - EXPECT_CALL(cmdQueue, synchronize).Times(1).WillRepeatedly(::testing::Return(ZE_RESULT_SUCCESS)); - ze_copy_region_t sr = {0U, 0U, 0U, width, height, 0U}; ze_copy_region_t dr = {0U, 0U, 0U, width, height, 0U}; - result = commandList->appendMemoryCopyRegion(dst_buffer, &dr, width, 0, - src_buffer, &sr, width, 0, events[0], 1u, &events[1]); + auto result = commandList0->appendMemoryCopyRegion(dst_buffer, &dr, width, 0, + src_buffer, &sr, width, 0, events[0], 1u, &events[1]); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - commandList->cmdQImmediate = nullptr; +} + +using ImageSupported = IsAtLeastProduct; + +HWTEST2_F(CommandListCreate, givenImmediateCommandListWhenCopyRegionFromImageToImageUsingRenderThenSuccessIsReturned, ImageSupported) { + const ze_command_queue_desc_t queueDesc = {}; + bool internalEngine = true; + + ze_result_t returnValue; + std::unique_ptr commandList0(CommandList::createImmediate(productFamily, + device, + &queueDesc, + internalEngine, + NEO::EngineGroupType::Copy, + returnValue)); + ASSERT_NE(nullptr, commandList0); + + CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); + EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); + + ze_image_desc_t desc = {}; + desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; + desc.type = ZE_IMAGE_TYPE_3D; + desc.format.layout = ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8; + desc.format.type = ZE_IMAGE_FORMAT_TYPE_UINT; + desc.width = 11; + desc.height = 13; + desc.depth = 17; + + desc.format.x = ZE_IMAGE_FORMAT_SWIZZLE_A; + desc.format.y = ZE_IMAGE_FORMAT_SWIZZLE_0; + desc.format.z = ZE_IMAGE_FORMAT_SWIZZLE_1; + desc.format.w = ZE_IMAGE_FORMAT_SWIZZLE_X; + auto imageHWSrc = std::make_unique>>(); + auto imageHWDst = std::make_unique>>(); + imageHWSrc->initialize(device, &desc); + imageHWDst->initialize(device, &desc); + + ze_image_region_t srcRegion = {4, 4, 4, 2, 2, 2}; + ze_image_region_t dstRegion = {4, 4, 4, 2, 2, 2}; + returnValue = commandList0->appendImageCopyRegion(imageHWDst->toHandle(), imageHWSrc->toHandle(), &dstRegion, &srcRegion, nullptr, 0, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); +} + +HWTEST2_F(CommandListCreate, givenImmediateCommandListWhenCopyRegionFromImageToImageUsingCopyWintInvalidRegionArguementsThenErrorIsReturned, ImageSupported) { + const ze_command_queue_desc_t queueDesc = {}; + bool internalEngine = true; + + ze_result_t returnValue; + std::unique_ptr commandList0(CommandList::createImmediate(productFamily, + device, + &queueDesc, + internalEngine, + NEO::EngineGroupType::Copy, + returnValue)); + ASSERT_NE(nullptr, commandList0); + + CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); + EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); + + ze_image_desc_t desc = {}; + desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; + desc.type = ZE_IMAGE_TYPE_3D; + desc.format.layout = ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8; + desc.format.type = ZE_IMAGE_FORMAT_TYPE_UINT; + desc.width = 11; + desc.height = 13; + desc.depth = 17; + + desc.format.x = ZE_IMAGE_FORMAT_SWIZZLE_A; + desc.format.y = ZE_IMAGE_FORMAT_SWIZZLE_0; + desc.format.z = ZE_IMAGE_FORMAT_SWIZZLE_1; + desc.format.w = ZE_IMAGE_FORMAT_SWIZZLE_X; + + auto imageHWSrc = std::make_unique>>(); + auto imageHWDst = std::make_unique>>(); + imageHWSrc->initialize(device, &desc); + imageHWDst->initialize(device, &desc); + + ze_image_region_t srcRegion = {4, 4, 4, 2, 2, 2}; + ze_image_region_t dstRegion = {2, 2, 2, 4, 4, 4}; + returnValue = commandList0->appendImageCopyRegion(imageHWDst->toHandle(), imageHWSrc->toHandle(), &dstRegion, &srcRegion, nullptr, 0, nullptr); + EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, returnValue); +} + +HWTEST2_F(CommandListCreate, givenImmediateCommandListWhenCopyFromImageToImageUsingRenderThenSuccessIsReturned, ImageSupported) { + const ze_command_queue_desc_t queueDesc = {}; + bool internalEngine = true; + + ze_result_t returnValue; + std::unique_ptr commandList0(CommandList::createImmediate(productFamily, + device, + &queueDesc, + internalEngine, + NEO::EngineGroupType::Copy, + returnValue)); + ASSERT_NE(nullptr, commandList0); + + CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); + EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); + + ze_image_desc_t desc = {}; + desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; + desc.type = ZE_IMAGE_TYPE_3D; + desc.format.layout = ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8; + desc.format.type = ZE_IMAGE_FORMAT_TYPE_UINT; + desc.width = 11; + desc.height = 13; + desc.depth = 17; + + desc.format.x = ZE_IMAGE_FORMAT_SWIZZLE_A; + desc.format.y = ZE_IMAGE_FORMAT_SWIZZLE_0; + desc.format.z = ZE_IMAGE_FORMAT_SWIZZLE_1; + desc.format.w = ZE_IMAGE_FORMAT_SWIZZLE_X; + + auto imageHWSrc = std::make_unique>>(); + auto imageHWDst = std::make_unique>>(); + imageHWSrc->initialize(device, &desc); + imageHWDst->initialize(device, &desc); + + returnValue = commandList0->appendImageCopy(imageHWDst->toHandle(), imageHWSrc->toHandle(), nullptr, 0, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); } HWTEST2_F(CommandListCreate, givenImmediateCommandListWhenMemoryCopyRegionWithSignalAndInvalidWaitHandleUsingCopyEngineThenErrorIsReturned, Platforms) { - Mock cmdQueue; + const ze_command_queue_desc_t desc = {}; + bool internalEngine = true; ze_result_t result = ZE_RESULT_SUCCESS; - auto commandList = std::make_unique>>(); - ASSERT_NE(nullptr, commandList); - ze_result_t ret = commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); - ASSERT_EQ(ZE_RESULT_SUCCESS, ret); - commandList->device = device; - commandList->cmdQImmediate = &cmdQueue; - commandList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE; + std::unique_ptr commandList0(CommandList::createImmediate(productFamily, + device, + &desc, + internalEngine, + NEO::EngineGroupType::Copy, + result)); + ASSERT_NE(nullptr, commandList0); + + CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); + EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); void *src_buffer = reinterpret_cast(0x1234); void *dst_buffer = reinterpret_cast(0x2345); @@ -846,10 +1019,9 @@ HWTEST2_F(CommandListCreate, givenImmediateCommandListWhenMemoryCopyRegionWithSi ze_copy_region_t sr = {0U, 0U, 0U, width, height, 0U}; ze_copy_region_t dr = {0U, 0U, 0U, width, height, 0U}; - result = commandList->appendMemoryCopyRegion(dst_buffer, &dr, width, 0, - src_buffer, &sr, width, 0, events[0], 1u, nullptr); + result = commandList0->appendMemoryCopyRegion(dst_buffer, &dr, width, 0, + src_buffer, &sr, width, 0, events[0], 1u, nullptr); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); - commandList->cmdQImmediate = nullptr; } TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndAppendSignalEventWithTimestampThenUpdateTaskCountNeededFlagIsDisabled) { @@ -884,10 +1056,8 @@ TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndAppendSignalEven ASSERT_EQ(static_cast(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr); commandList->appendSignalEvent(event); - EXPECT_EQ(false, event_object->updateTaskCountEnabled); auto result = event_object->hostSignal(); - EXPECT_EQ(false, event_object->updateTaskCountEnabled); ASSERT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(event_object->queryStatus(), ZE_RESULT_SUCCESS); @@ -925,16 +1095,13 @@ TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndAppendBarrierThe ASSERT_EQ(static_cast(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr); commandList->appendBarrier(event, 0, nullptr); - EXPECT_EQ(false, event_object->updateTaskCountEnabled); auto result = event_object->hostSignal(); - EXPECT_EQ(false, event_object->updateTaskCountEnabled); ASSERT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(event_object->queryStatus(), ZE_RESULT_SUCCESS); commandList->appendBarrier(nullptr, 0, nullptr); - EXPECT_EQ(false, event_object->updateTaskCountEnabled); } TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndAppendEventResetThenUpdateTaskCountNeededFlagIsDisabled) { @@ -969,50 +1136,13 @@ TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndAppendEventReset ASSERT_EQ(static_cast(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr); commandList->appendEventReset(event); - EXPECT_EQ(false, event_object->updateTaskCountEnabled); auto result = event_object->hostSignal(); - EXPECT_EQ(false, event_object->updateTaskCountEnabled); ASSERT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_EQ(event_object->queryStatus(), ZE_RESULT_SUCCESS); } -HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndCopyOnlyImmediateCommandListWhenAppendWaitEventsWithHostScopeAndTimeStampThenTaskCountNeededFlagIsDisabled) { - ze_command_queue_desc_t desc = {}; - desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; - ze_result_t returnValue; - std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::Copy, returnValue)); - ASSERT_NE(nullptr, commandList); - - EXPECT_EQ(device, commandList->device); - EXPECT_EQ(1u, commandList->cmdListType); - EXPECT_NE(nullptr, commandList->cmdQImmediate); - - auto &commandContainer = commandList->commandContainer; - - ze_event_pool_desc_t eventPoolDesc = {}; - eventPoolDesc.count = 2; - eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; - - ze_event_desc_t eventDesc = {}; - eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; - auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc)); - auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); - auto event2 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); - ze_event_handle_t events[] = {event->toHandle(), event2->toHandle()}; - - auto used = commandContainer.getCommandStream()->getUsed(); - commandList->appendWaitOnEvents(2, events); - EXPECT_EQ(false, event->updateTaskCountEnabled); - EXPECT_EQ(false, event2->updateTaskCountEnabled); - GenCmdList cmdList; - ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( - cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); - - EXPECT_EQ(used, commandContainer.getCommandStream()->getUsed()); -} - TEST_F(CommandListCreate, givenQueueDescriptionwhenCreatingImmediateCommandListForCopyEnigneThenItHasImmediateCommandQueueCreated) { auto engines = neoDevice->getEngineGroups(); uint32_t numaAvailableEngineGroups = 0; @@ -1052,18 +1182,12 @@ TEST_F(CommandListCreate, givenQueueDescriptionwhenCreatingImmediateCommandListF commandList->appendBarrier(nullptr, 0, nullptr); commandList->appendBarrier(event->toHandle(), 2, events); - EXPECT_EQ(true, event->updateTaskCountEnabled); - EXPECT_EQ(true, event1->updateTaskCountEnabled); - EXPECT_EQ(true, event2->updateTaskCountEnabled); auto result = event->hostSignal(); - EXPECT_EQ(false, event->updateTaskCountEnabled); ASSERT_EQ(ZE_RESULT_SUCCESS, result); result = event1->hostSignal(); - EXPECT_EQ(false, event1->updateTaskCountEnabled); ASSERT_EQ(ZE_RESULT_SUCCESS, result); result = event2->hostSignal(); - EXPECT_EQ(false, event2->updateTaskCountEnabled); ASSERT_EQ(ZE_RESULT_SUCCESS, result); } } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp index 334dc3a860..c78f64831c 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp @@ -101,25 +101,26 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenAppendWriteGlobalTimestampCalle } HWTEST2_F(CommandListCreate, givenImmediateCommandListWhenAppendWriteGlobalTimestampThenReturnsSuccess, Platforms) { - Mock cmdQueue; + const ze_command_queue_desc_t desc = {}; + bool internalEngine = true; + + ze_result_t returnValue; + std::unique_ptr commandList0(CommandList::createImmediate(productFamily, + device, + &desc, + internalEngine, + NEO::EngineGroupType::RenderCompute, + returnValue)); + ASSERT_NE(nullptr, commandList0); + + CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); + EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); + uint64_t timestampAddress = 0x12345678555500; uint64_t *dstptr = reinterpret_cast(timestampAddress); - auto commandList = std::make_unique>>(); - ASSERT_NE(nullptr, commandList); - ze_result_t ret = commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - ASSERT_EQ(ZE_RESULT_SUCCESS, ret); - commandList->device = device; - commandList->cmdQImmediate = &cmdQueue; - commandList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE; - - EXPECT_CALL(cmdQueue, executeCommandLists).Times(1).WillRepeatedly(::testing::Return(ZE_RESULT_SUCCESS)); - EXPECT_CALL(cmdQueue, synchronize).Times(1).WillRepeatedly(::testing::Return(ZE_RESULT_SUCCESS)); - - auto result = commandList->appendWriteGlobalTimestamp(dstptr, nullptr, 0, nullptr); + auto result = commandList0->appendWriteGlobalTimestamp(dstptr, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); - - commandList->cmdQImmediate = nullptr; } HWTEST_F(CommandListCreate, GivenCommandListWhenUnalignedPtrThenLeftMiddleAndRightCopyAdded) { @@ -432,16 +433,20 @@ HWTEST2_F(HostPointerManagerCommandListTest, givenCommandListWhenMemoryFillWithS } HWTEST2_F(HostPointerManagerCommandListTest, givenImmediateCommandListWhenMemoryFillWithSignalAndWaitEventsUsingRenderEngineThenSuccessIsReturned, Platforms) { - Mock cmdQueue; + const ze_command_queue_desc_t desc = {}; + bool internalEngine = true; - ze_result_t result = ZE_RESULT_SUCCESS; - auto commandList = std::make_unique>>(); - ASSERT_NE(nullptr, commandList); - ze_result_t ret = commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - ASSERT_EQ(ZE_RESULT_SUCCESS, ret); - commandList->device = device; - commandList->cmdQImmediate = &cmdQueue; - commandList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE; + ze_result_t ret = ZE_RESULT_SUCCESS; + std::unique_ptr commandList0(CommandList::createImmediate(productFamily, + device, + &desc, + internalEngine, + NEO::EngineGroupType::RenderCompute, + ret)); + ASSERT_NE(nullptr, commandList0); + + CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); + EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); ret = hostDriverHandle->importExternalPointer(heapPointer, MemoryConstants::pageSize); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); @@ -463,29 +468,29 @@ HWTEST2_F(HostPointerManagerCommandListTest, givenImmediateCommandListWhenMemory auto event1 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event1.get()); - EXPECT_CALL(cmdQueue, executeCommandLists).Times(1).WillRepeatedly(::testing::Return(ZE_RESULT_SUCCESS)); - EXPECT_CALL(cmdQueue, synchronize).Times(1).WillRepeatedly(::testing::Return(ZE_RESULT_SUCCESS)); - - result = commandList->appendMemoryFill(heapPointer, reinterpret_cast(&one), sizeof(one), size, - events[0], 1u, &events[1]); - EXPECT_EQ(ZE_RESULT_SUCCESS, result); + ret = commandList0->appendMemoryFill(heapPointer, reinterpret_cast(&one), sizeof(one), size, + events[0], 1u, &events[1]); + EXPECT_EQ(ZE_RESULT_SUCCESS, ret); ret = hostDriverHandle->releaseImportedPointer(heapPointer); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); - commandList->cmdQImmediate = nullptr; } HWTEST2_F(HostPointerManagerCommandListTest, givenImmediateCommandListWhenMemoryFillWithSignalAndWaitEventsUsingCopyEngineThenSuccessIsReturned, Platforms) { - Mock cmdQueue; + const ze_command_queue_desc_t desc = {}; + bool internalEngine = true; - ze_result_t result = ZE_RESULT_SUCCESS; - auto commandList = std::make_unique>>(); - ASSERT_NE(nullptr, commandList); - ze_result_t ret = commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); - ASSERT_EQ(ZE_RESULT_SUCCESS, ret); - commandList->device = device; - commandList->cmdQImmediate = &cmdQueue; - commandList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE; + ze_result_t ret = ZE_RESULT_SUCCESS; + std::unique_ptr commandList0(CommandList::createImmediate(productFamily, + device, + &desc, + internalEngine, + NEO::EngineGroupType::Copy, + ret)); + ASSERT_NE(nullptr, commandList0); + + CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); + EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); ret = hostDriverHandle->importExternalPointer(heapPointer, MemoryConstants::pageSize); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); @@ -507,31 +512,31 @@ HWTEST2_F(HostPointerManagerCommandListTest, givenImmediateCommandListWhenMemory auto event1 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event1.get()); - EXPECT_CALL(cmdQueue, executeCommandLists).Times(1).WillRepeatedly(::testing::Return(ZE_RESULT_SUCCESS)); - EXPECT_CALL(cmdQueue, synchronize).Times(1).WillRepeatedly(::testing::Return(ZE_RESULT_SUCCESS)); - - result = commandList->appendMemoryFill(heapPointer, reinterpret_cast(&one), sizeof(one), size, - events[0], 1u, &events[1]); - EXPECT_EQ(ZE_RESULT_SUCCESS, result); + ret = commandList0->appendMemoryFill(heapPointer, reinterpret_cast(&one), sizeof(one), size, + events[0], 1u, &events[1]); + EXPECT_EQ(ZE_RESULT_SUCCESS, ret); ret = hostDriverHandle->releaseImportedPointer(heapPointer); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); - commandList->cmdQImmediate = nullptr; } HWTEST2_F(HostPointerManagerCommandListTest, givenImmediateCommandListWhenMemoryFillWithSignalAndInvalidWaitHandleUsingCopyEngineThenErrorIsReturned, SupportedPlatformsSklIcllp) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - Mock cmdQueue; - ze_result_t result = ZE_RESULT_SUCCESS; - auto commandList = std::make_unique>>(); - ASSERT_NE(nullptr, commandList); - ze_result_t ret = commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); - auto &commandContainer = commandList->commandContainer; - ASSERT_EQ(ZE_RESULT_SUCCESS, ret); - commandList->device = device; - commandList->cmdQImmediate = &cmdQueue; - commandList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE; + ze_result_t ret = ZE_RESULT_SUCCESS; + const ze_command_queue_desc_t desc = {}; + bool internalEngine = true; + + std::unique_ptr commandList0(CommandList::createImmediate(productFamily, + device, + &desc, + internalEngine, + NEO::EngineGroupType::Copy, + ret)); + ASSERT_NE(nullptr, commandList0); + + CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); + EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); ret = hostDriverHandle->importExternalPointer(heapPointer, MemoryConstants::pageSize); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); @@ -553,21 +558,12 @@ HWTEST2_F(HostPointerManagerCommandListTest, givenImmediateCommandListWhenMemory auto event1 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); events.push_back(event1.get()); - result = commandList->appendMemoryFill(heapPointer, reinterpret_cast(&one), sizeof(one), size, - events[0], 1u, nullptr); - EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); + ret = commandList0->appendMemoryFill(heapPointer, reinterpret_cast(&one), sizeof(one), size, + events[0], 1u, nullptr); + EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, ret); ret = hostDriverHandle->releaseImportedPointer(heapPointer); EXPECT_EQ(ZE_RESULT_SUCCESS, ret); - - GenCmdList cmdList; - ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( - cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); - - auto itor = find(cmdList.begin(), cmdList.end()); - EXPECT_EQ(cmdList.end(), itor); - - commandList->cmdQImmediate = nullptr; } HWTEST2_F(HostPointerManagerCommandListTest, givenDebugModeToRegisterAllHostPointerWhenFindIsCalledThenRegisterHappens, Platforms) { diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp index ef9f296f23..1ea0292007 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp @@ -720,13 +720,9 @@ HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndImmediateCommandListWhenAppendW event.waitScope = 0; event2.waitScope = 0; ze_event_handle_t events[] = {&event, &event2}; - auto event_object = L0::Event::fromHandle(events[0]); - auto event_object2 = L0::Event::fromHandle(events[1]); auto used = commandContainer.getCommandStream()->getUsed(); commandList->appendWaitOnEvents(2, events); - EXPECT_EQ(true, event_object->updateTaskCountEnabled); - EXPECT_EQ(true, event_object2->updateTaskCountEnabled); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( @@ -787,13 +783,9 @@ HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndCopyOnlyImmediateCommandListWhe event.waitScope = 0; event2.waitScope = 0; ze_event_handle_t events[] = {&event, &event2}; - auto event_object = L0::Event::fromHandle(events[0]); - auto event_object2 = L0::Event::fromHandle(events[1]); auto used = commandContainer.getCommandStream()->getUsed(); commandList->appendWaitOnEvents(2, events); - EXPECT_EQ(true, event_object->updateTaskCountEnabled); - EXPECT_EQ(true, event_object2->updateTaskCountEnabled); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel.cpp index 3debfaba65..bcc4f9e1c2 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel.cpp @@ -473,95 +473,104 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelLaunchWithTSEventAndScopeFla HWTEST2_F(CommandListAppendLaunchKernel, givenImmediateCommandListWhenAppendingLaunchKernelThenKernelIsExecutedOnImmediateCmdQ, SklPlusMatcher) { createKernel(); - Mock cmdQueue; + const ze_command_queue_desc_t desc = {}; + bool internalEngine = true; - auto commandList = std::make_unique>>(); - ASSERT_NE(nullptr, commandList); - ze_result_t ret = commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - ASSERT_EQ(ZE_RESULT_SUCCESS, ret); - commandList->device = device; - commandList->cmdQImmediate = &cmdQueue; - commandList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE; + ze_result_t result = ZE_RESULT_SUCCESS; + std::unique_ptr commandList0(CommandList::createImmediate(productFamily, + device, + &desc, + internalEngine, + NEO::EngineGroupType::RenderCompute, + result)); + ASSERT_NE(nullptr, commandList0); - EXPECT_CALL(cmdQueue, executeCommandLists).Times(1).WillRepeatedly(::testing::Return(ZE_RESULT_SUCCESS)); - EXPECT_CALL(cmdQueue, synchronize).Times(1).WillRepeatedly(::testing::Return(ZE_RESULT_SUCCESS)); + CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); + EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); ze_group_count_t groupCount{1, 1, 1}; - auto result = commandList->appendLaunchKernel( + result = commandList0->appendLaunchKernel( kernel->toHandle(), &groupCount, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); - commandList->cmdQImmediate = nullptr; } HWTEST2_F(CommandListAppendLaunchKernel, givenImmediateCommandListWhenAppendingLaunchKernelWithInvalidEventThenInvalidArgumentErrorIsReturned, SklPlusMatcher) { createKernel(); - Mock cmdQueue; + const ze_command_queue_desc_t desc = {}; + bool internalEngine = true; - auto commandList = std::make_unique>>(); - ASSERT_NE(nullptr, commandList); - ze_result_t ret = commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - ASSERT_EQ(ZE_RESULT_SUCCESS, ret); - commandList->device = device; - commandList->cmdQImmediate = &cmdQueue; - commandList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE; + ze_result_t result = ZE_RESULT_SUCCESS; + std::unique_ptr commandList0(CommandList::createImmediate(productFamily, + device, + &desc, + internalEngine, + NEO::EngineGroupType::RenderCompute, + result)); + ASSERT_NE(nullptr, commandList0); + + CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); + EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); ze_group_count_t groupCount{1, 1, 1}; - auto result = commandList->appendLaunchKernel( + result = commandList0->appendLaunchKernel( kernel->toHandle(), &groupCount, nullptr, 1, nullptr); ASSERT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); - commandList->cmdQImmediate = nullptr; } HWTEST2_F(CommandListAppendLaunchKernel, givenImmediateCommandListWhenAppendingLaunchKernelIndirectThenKernelIsExecutedOnImmediateCmdQ, SklPlusMatcher) { createKernel(); + const ze_command_queue_desc_t desc = {}; + bool internalEngine = true; - Mock cmdQueue; + ze_result_t result = ZE_RESULT_SUCCESS; + std::unique_ptr commandList0(CommandList::createImmediate(productFamily, + device, + &desc, + internalEngine, + NEO::EngineGroupType::RenderCompute, + result)); + ASSERT_NE(nullptr, commandList0); - auto commandList = std::make_unique>>(); - ASSERT_NE(nullptr, commandList); - ze_result_t ret = commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - ASSERT_EQ(ZE_RESULT_SUCCESS, ret); - commandList->device = device; - commandList->cmdQImmediate = &cmdQueue; - commandList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE; - - EXPECT_CALL(cmdQueue, executeCommandLists).Times(1).WillRepeatedly(::testing::Return(ZE_RESULT_SUCCESS)); - EXPECT_CALL(cmdQueue, synchronize).Times(1).WillRepeatedly(::testing::Return(ZE_RESULT_SUCCESS)); + CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); + EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); ze_group_count_t groupCount{1, 1, 1}; - auto result = commandList->appendLaunchKernelIndirect( + result = commandList0->appendLaunchKernelIndirect( kernel->toHandle(), &groupCount, nullptr, 0, nullptr); ASSERT_EQ(ZE_RESULT_SUCCESS, result); - commandList->cmdQImmediate = nullptr; } HWTEST2_F(CommandListAppendLaunchKernel, givenImmediateCommandListWhenAppendingLaunchKernelIndirectWithInvalidEventThenInvalidArgumentErrorIsReturned, SklPlusMatcher) { createKernel(); - Mock cmdQueue; + const ze_command_queue_desc_t desc = {}; + bool internalEngine = true; - auto commandList = std::make_unique>>(); - ASSERT_NE(nullptr, commandList); - ze_result_t ret = commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); - ASSERT_EQ(ZE_RESULT_SUCCESS, ret); - commandList->device = device; - commandList->cmdQImmediate = &cmdQueue; - commandList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE; + ze_result_t result = ZE_RESULT_SUCCESS; + std::unique_ptr commandList0(CommandList::createImmediate(productFamily, + device, + &desc, + internalEngine, + NEO::EngineGroupType::RenderCompute, + result)); + ASSERT_NE(nullptr, commandList0); + + CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); + EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); ze_group_count_t groupCount{1, 1, 1}; - auto result = commandList->appendLaunchKernelIndirect( + result = commandList0->appendLaunchKernelIndirect( kernel->toHandle(), &groupCount, nullptr, 1, nullptr); ASSERT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); - commandList->cmdQImmediate = nullptr; } using SupportedPlatforms = IsWithinProducts; diff --git a/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger.cpp b/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger.cpp index 3825cd6c26..18faf160e5 100644 --- a/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger.cpp +++ b/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger.cpp @@ -17,6 +17,7 @@ #include "test.h" +#include "level_zero/core/source/image/image_hw.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" #include "level_zero/core/test/unit_tests/mocks/mock_kernel.h" #include "level_zero/core/test/unit_tests/sources/debugger/l0_debugger_fixture.h" @@ -450,9 +451,41 @@ HWTEST2_F(L0DebuggerTest, givenDebuggingEnabledWhenCommandListIsExecutedThenSbaB } using L0DebuggerInternalUsageTest = L0DebuggerTest; -HWTEST_F(L0DebuggerInternalUsageTest, givenDebuggingEnabledWhenCommandListIsInititalizedOrResetThenCaptureSbaIsNotCalled) { +HWTEST_F(L0DebuggerInternalUsageTest, givenFlushTaskSubmissionEnabledWhenCommandListIsInititalizedOrResetThenCaptureSbaIsNotCalled) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; + DebugManagerStateRestore restorer; + NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); + + size_t usedSpaceBefore = 0; + ze_command_queue_desc_t queueDesc = {}; + ze_result_t returnValue = ZE_RESULT_SUCCESS; + auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, true, NEO::EngineGroupType::RenderCompute, returnValue); + + auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); + ASSERT_GE(usedSpaceAfter, usedSpaceBefore); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, commandList->commandContainer.getCommandStream()->getCpuBase(), usedSpaceAfter)); + + auto sbaItor = find(cmdList.begin(), cmdList.end()); + ASSERT_EQ(cmdList.end(), sbaItor); + + EXPECT_EQ(0u, getMockDebuggerL0Hw()->captureStateBaseAddressCount); + + commandList->reset(); + EXPECT_EQ(0u, getMockDebuggerL0Hw()->captureStateBaseAddressCount); + + commandList->destroy(); +} + +HWTEST_F(L0DebuggerInternalUsageTest, givenFlushTaskSubmissionDisabledWhenCommandListIsInititalizedOrResetThenCaptureSbaIsCalled) { + using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; + + DebugManagerStateRestore restorer; + NEO::DebugManager.flags.EnableFlushTaskSubmission.set(false); + size_t usedSpaceBefore = 0; ze_command_queue_desc_t queueDesc = {}; ze_result_t returnValue = ZE_RESULT_SUCCESS; @@ -496,6 +529,274 @@ HWTEST_F(L0DebuggerInternalUsageTest, givenDebuggerLogsDisabledWhenCommandListIs commandList->destroy(); } +HWTEST_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionEnabledForImmediateCommandListForAppendLaunchKernelThenSuccessIsReturned) { + Mock<::L0::Kernel> kernel; + DebugManagerStateRestore restorer; + NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); + + ze_command_queue_desc_t queueDesc = {}; + ze_result_t returnValue = ZE_RESULT_SUCCESS; + ze_group_count_t groupCount{1, 1, 1}; + auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, true, NEO::EngineGroupType::RenderCompute, returnValue); + + auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + commandList->destroy(); +} + +HWTEST_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionDisabledForImmediateCommandListForAppendLaunchKernelThenSuccessIsReturned) { + Mock<::L0::Kernel> kernel; + DebugManagerStateRestore restorer; + NEO::DebugManager.flags.EnableFlushTaskSubmission.set(false); + + ze_command_queue_desc_t queueDesc = {}; + ze_result_t returnValue = ZE_RESULT_SUCCESS; + ze_group_count_t groupCount{1, 1, 1}; + auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, true, NEO::EngineGroupType::RenderCompute, returnValue); + + auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + commandList->destroy(); +} + +HWTEST_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionEnabledForImmediateCommandListForAppendLaunchKernelIndirectThenSuccessIsReturned) { + Mock<::L0::Kernel> kernel; + DebugManagerStateRestore restorer; + NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); + + ze_command_queue_desc_t queueDesc = {}; + queueDesc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; + ze_result_t returnValue = ZE_RESULT_SUCCESS; + ze_group_count_t groupCount{1, 1, 1}; + auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, true, NEO::EngineGroupType::RenderCompute, returnValue); + + auto result = commandList->appendLaunchKernelIndirect(kernel.toHandle(), &groupCount, nullptr, 0, nullptr); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + commandList->destroy(); +} + +HWTEST_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionDisabledForImmediateCommandListForAppendLaunchKernelIndirectThenSuccessIsReturned) { + Mock<::L0::Kernel> kernel; + DebugManagerStateRestore restorer; + NEO::DebugManager.flags.EnableFlushTaskSubmission.set(false); + + ze_command_queue_desc_t queueDesc = {}; + queueDesc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; + ze_result_t returnValue = ZE_RESULT_SUCCESS; + ze_group_count_t groupCount{1, 1, 1}; + auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, true, NEO::EngineGroupType::RenderCompute, returnValue); + + auto result = commandList->appendLaunchKernelIndirect(kernel.toHandle(), &groupCount, nullptr, 0, nullptr); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + commandList->destroy(); +} + +HWTEST_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionEnabledForImmediateCommandListForAppendMemoryCopyThenSuccessIsReturned) { + DebugManagerStateRestore restorer; + NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); + + void *srcPtr = reinterpret_cast(0x1234); + void *dstPtr = reinterpret_cast(0x2345); + ze_command_queue_desc_t queueDesc = {}; + ze_result_t returnValue = ZE_RESULT_SUCCESS; + auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, true, NEO::EngineGroupType::RenderCompute, returnValue); + + auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 0x100, nullptr, 0, nullptr); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + commandList->destroy(); +} + +HWTEST_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionDisabledForImmediateCommandListForAppendMemoryCopyThenSuccessIsReturned) { + DebugManagerStateRestore restorer; + NEO::DebugManager.flags.EnableFlushTaskSubmission.set(false); + + void *srcPtr = reinterpret_cast(0x1234); + void *dstPtr = reinterpret_cast(0x2345); + ze_command_queue_desc_t queueDesc = {}; + ze_result_t returnValue = ZE_RESULT_SUCCESS; + auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, true, NEO::EngineGroupType::RenderCompute, returnValue); + + auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 0x100, nullptr, 0, nullptr); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + commandList->destroy(); +} + +HWTEST_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionEnabledForImmediateCommandListForAppendMemoryCopyRegionThenSuccessIsReturned) { + DebugManagerStateRestore restorer; + NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); + + void *srcPtr = reinterpret_cast(0x1234); + void *dstPtr = reinterpret_cast(0x2345); + ze_copy_region_t dstRegion = {}; + ze_copy_region_t srcRegion = {}; + + ze_command_queue_desc_t queueDesc = {}; + ze_result_t returnValue = ZE_RESULT_SUCCESS; + auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, true, NEO::EngineGroupType::RenderCompute, returnValue); + + auto result = commandList->appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + commandList->destroy(); +} + +HWTEST_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionDisabledForImmediateCommandListForAppendMemoryCopyRegionThenSuccessIsReturned) { + DebugManagerStateRestore restorer; + NEO::DebugManager.flags.EnableFlushTaskSubmission.set(false); + + void *srcPtr = reinterpret_cast(0x1234); + void *dstPtr = reinterpret_cast(0x2345); + ze_copy_region_t dstRegion = {}; + ze_copy_region_t srcRegion = {}; + + ze_command_queue_desc_t queueDesc = {}; + ze_result_t returnValue = ZE_RESULT_SUCCESS; + auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, true, NEO::EngineGroupType::RenderCompute, returnValue); + + auto result = commandList->appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + commandList->destroy(); +} + +HWTEST2_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionEnabledForImmediateCommandListForAppendImageCopyRegionThenSuccessIsReturned, IsSklOrAbove) { + DebugManagerStateRestore restorer; + NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); + + const ze_command_queue_desc_t queueDesc = {}; + bool internalEngine = true; + + void *srcPtr = reinterpret_cast(0x1234); + void *dstPtr = reinterpret_cast(0x2345); + + ze_result_t returnValue; + std::unique_ptr commandList0(CommandList::createImmediate(productFamily, + device, + &queueDesc, + internalEngine, + NEO::EngineGroupType::Copy, + returnValue)); + ASSERT_NE(nullptr, commandList0); + + CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); + EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); + + ze_image_desc_t desc = {}; + desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; + desc.type = ZE_IMAGE_TYPE_3D; + desc.format.layout = ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8; + desc.format.type = ZE_IMAGE_FORMAT_TYPE_UINT; + desc.width = 11; + desc.height = 13; + desc.depth = 17; + + desc.format.x = ZE_IMAGE_FORMAT_SWIZZLE_A; + desc.format.y = ZE_IMAGE_FORMAT_SWIZZLE_0; + desc.format.z = ZE_IMAGE_FORMAT_SWIZZLE_1; + desc.format.w = ZE_IMAGE_FORMAT_SWIZZLE_X; + auto imageHWSrc = std::make_unique>>(); + auto imageHWDst = std::make_unique>>(); + imageHWSrc->initialize(device, &desc); + imageHWDst->initialize(device, &desc); + + returnValue = commandList0->appendImageCopy(imageHWDst->toHandle(), imageHWSrc->toHandle(), nullptr, 0, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); + returnValue = commandList0->appendImageCopyFromMemory(imageHWDst->toHandle(), srcPtr, nullptr, nullptr, 0, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); + returnValue = commandList0->appendImageCopyToMemory(dstPtr, imageHWSrc->toHandle(), nullptr, nullptr, 0, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); +} + +HWTEST2_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionDisabledForImmediateCommandListForAppendImageCopyRegionThenSuccessIsReturned, IsSklOrAbove) { + DebugManagerStateRestore restorer; + NEO::DebugManager.flags.EnableFlushTaskSubmission.set(false); + + const ze_command_queue_desc_t queueDesc = {}; + bool internalEngine = true; + + void *srcPtr = reinterpret_cast(0x1234); + void *dstPtr = reinterpret_cast(0x2345); + + ze_result_t returnValue; + std::unique_ptr commandList0(CommandList::createImmediate(productFamily, + device, + &queueDesc, + internalEngine, + NEO::EngineGroupType::Copy, + returnValue)); + ASSERT_NE(nullptr, commandList0); + + CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); + EXPECT_EQ(cmdQueue->getCsr(), neoDevice->getInternalEngine().commandStreamReceiver); + + ze_image_desc_t desc = {}; + desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC; + desc.type = ZE_IMAGE_TYPE_3D; + desc.format.layout = ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8; + desc.format.type = ZE_IMAGE_FORMAT_TYPE_UINT; + desc.width = 11; + desc.height = 13; + desc.depth = 17; + + desc.format.x = ZE_IMAGE_FORMAT_SWIZZLE_A; + desc.format.y = ZE_IMAGE_FORMAT_SWIZZLE_0; + desc.format.z = ZE_IMAGE_FORMAT_SWIZZLE_1; + desc.format.w = ZE_IMAGE_FORMAT_SWIZZLE_X; + auto imageHWSrc = std::make_unique>>(); + auto imageHWDst = std::make_unique>>(); + imageHWSrc->initialize(device, &desc); + imageHWDst->initialize(device, &desc); + + returnValue = commandList0->appendImageCopy(imageHWDst->toHandle(), imageHWSrc->toHandle(), nullptr, 0, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); + returnValue = commandList0->appendImageCopyFromMemory(imageHWDst->toHandle(), srcPtr, nullptr, nullptr, 0, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); + returnValue = commandList0->appendImageCopyToMemory(dstPtr, imageHWSrc->toHandle(), nullptr, nullptr, 0, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); +} + +HWTEST2_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionEnabledCommandListAndAppendMemoryCopyCalledInLoopThenMultipleCommandBufferAreUsedAndSuccessIsReturned, IsSklOrAbove) { + DebugManagerStateRestore restorer; + NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); + + void *srcPtr = reinterpret_cast(0x1234); + void *dstPtr = reinterpret_cast(0x2345); + ze_command_queue_desc_t queueDesc = {}; + ze_result_t returnValue = ZE_RESULT_SUCCESS; + auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, true, NEO::EngineGroupType::RenderCompute, returnValue); + ASSERT_NE(nullptr, commandList); + + for (uint32_t count = 0; count < 2048; count++) { + auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 0x100, nullptr, 0, nullptr); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + } + commandList->destroy(); +} + +HWTEST2_F(L0DebuggerInternalUsageTest, givenUseCsrImmediateSubmissionDisabledCommandListAndAppendMemoryCopyCalledInLoopThenMultipleCommandBufferAreUsedAndSuccessIsReturned, IsSklOrAbove) { + DebugManagerStateRestore restorer; + NEO::DebugManager.flags.EnableFlushTaskSubmission.set(false); + + void *srcPtr = reinterpret_cast(0x1234); + void *dstPtr = reinterpret_cast(0x2345); + ze_command_queue_desc_t queueDesc = {}; + ze_result_t returnValue = ZE_RESULT_SUCCESS; + auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, true, NEO::EngineGroupType::RenderCompute, returnValue); + ASSERT_NE(nullptr, commandList); + + for (uint32_t count = 0; count < 2048; count++) { + auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 0x100, nullptr, 0, nullptr); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + } + commandList->destroy(); +} + HWTEST2_F(L0DebuggerInternalUsageTest, givenDebuggingEnabledWhenInternalCmdQIsUsedThenDebuggerPathsAreNotExecuted, IsSklOrAbove) { ze_command_queue_desc_t queueDesc = {}; @@ -546,6 +847,224 @@ HWTEST2_F(L0DebuggerInternalUsageTest, givenDebuggingEnabledWhenInternalCmdQIsUs commandList->destroy(); } +HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionEnabledWithImmediateCommandListToInvokeNonKernelOperationsThenSuccessIsReturned) { + DebugManagerStateRestore restorer; + NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); + + void *dstPtr = nullptr; + ze_device_mem_alloc_desc_t deviceDesc = {}; + ze_host_mem_alloc_desc_t hostDesc = {}; + auto result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, 16384u, 4096u, &dstPtr); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); + ASSERT_NE(nullptr, commandList); + + EXPECT_EQ(device, commandList->device); + EXPECT_EQ(1u, commandList->cmdListType); + EXPECT_NE(nullptr, commandList->cmdQImmediate); + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; + eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; + + ze_event_handle_t event = nullptr; + + std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc)); + ASSERT_NE(nullptr, eventPool); + + eventPool->createEvent(&eventDesc, &event); + + std::unique_ptr event_object(L0::Event::fromHandle(event)); + ASSERT_NE(nullptr, event_object->csr); + ASSERT_EQ(static_cast(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr); + + returnValue = commandList->appendWaitOnEvents(1, &event); + EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); + + returnValue = commandList->appendBarrier(nullptr, 1, &event); + EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); + + returnValue = commandList->appendSignalEvent(event); + EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); + + returnValue = event_object->hostSignal(); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(event_object->queryStatus(), ZE_RESULT_SUCCESS); + + returnValue = commandList->appendWriteGlobalTimestamp(reinterpret_cast(dstPtr), nullptr, 0, nullptr); + EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); + + returnValue = commandList->appendEventReset(event); + EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); + + context->freeMem(dstPtr); +} + +HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionDisabledWithImmediateCommandListToInvokeNonKernelOperationsThenSuccessIsReturned) { + DebugManagerStateRestore restorer; + NEO::DebugManager.flags.EnableFlushTaskSubmission.set(false); + + void *dstPtr = nullptr; + ze_device_mem_alloc_desc_t deviceDesc = {}; + ze_host_mem_alloc_desc_t hostDesc = {}; + auto result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, 16384u, 4096u, &dstPtr); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); + ASSERT_NE(nullptr, commandList); + + EXPECT_EQ(device, commandList->device); + EXPECT_EQ(1u, commandList->cmdListType); + EXPECT_NE(nullptr, commandList->cmdQImmediate); + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; + eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; + + ze_event_handle_t event = nullptr; + + std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc)); + ASSERT_NE(nullptr, eventPool); + + eventPool->createEvent(&eventDesc, &event); + + std::unique_ptr event_object(L0::Event::fromHandle(event)); + ASSERT_NE(nullptr, event_object->csr); + ASSERT_EQ(static_cast(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr); + + returnValue = commandList->appendWaitOnEvents(1, &event); + EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); + + returnValue = commandList->appendBarrier(nullptr, 1, &event); + EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); + + returnValue = commandList->appendSignalEvent(event); + EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); + + returnValue = event_object->hostSignal(); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(event_object->queryStatus(), ZE_RESULT_SUCCESS); + + returnValue = commandList->appendWriteGlobalTimestamp(reinterpret_cast(dstPtr), nullptr, 0, nullptr); + EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); + + returnValue = commandList->appendEventReset(event); + EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); + + context->freeMem(dstPtr); +} + +HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionEnabledForImmediateCommandListForAppendMemoryFillThenSuccessIsReturned) { + DebugManagerStateRestore restorer; + NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); + + void *dstPtr = nullptr; + ze_device_mem_alloc_desc_t deviceDesc = {}; + ze_host_mem_alloc_desc_t hostDesc = {}; + auto result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, 16384u, 4096u, &dstPtr); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + int pattern = 1; + + ze_command_queue_desc_t queueDesc = {}; + queueDesc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; + ze_result_t returnValue = ZE_RESULT_SUCCESS; + auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, true, NEO::EngineGroupType::RenderCompute, returnValue); + + result = commandList->appendMemoryFill(dstPtr, reinterpret_cast(&pattern), sizeof(pattern), 4096u, nullptr, 0, nullptr); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + context->freeMem(dstPtr); + commandList->destroy(); +} + +HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionDisabledForImmediateCommandListForAppendMemoryFillThenSuccessIsReturned) { + DebugManagerStateRestore restorer; + NEO::DebugManager.flags.EnableFlushTaskSubmission.set(false); + + void *dstPtr = nullptr; + ze_device_mem_alloc_desc_t deviceDesc = {}; + ze_host_mem_alloc_desc_t hostDesc = {}; + auto result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, 16384u, 4096u, &dstPtr); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + int pattern = 1; + + ze_command_queue_desc_t queueDesc = {}; + queueDesc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; + ze_result_t returnValue = ZE_RESULT_SUCCESS; + auto commandList = CommandList::createImmediate(productFamily, device, &queueDesc, true, NEO::EngineGroupType::RenderCompute, returnValue); + + result = commandList->appendMemoryFill(dstPtr, reinterpret_cast(&pattern), sizeof(pattern), 4096u, nullptr, 0, nullptr); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + context->freeMem(dstPtr); + commandList->destroy(); +} + +HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionEnabledForRegularCommandListForAppendMemoryFillThenSuccessIsReturned) { + DebugManagerStateRestore restorer; + NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); + + ze_command_queue_desc_t queueDesc = {}; + ze_result_t returnValue; + auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue)); + ASSERT_NE(nullptr, commandQueue->commandStream); + + ze_command_list_handle_t commandLists[] = { + CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle()}; + const uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); + + auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + commandQueue->synchronize(0); + + auto commandList = CommandList::fromHandle(commandLists[0]); + commandList->destroy(); + + commandQueue->destroy(); +} + +HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionDisabledForRegularCommandListForAppendMemoryFillThenSuccessIsReturned) { + DebugManagerStateRestore restorer; + NEO::DebugManager.flags.EnableFlushTaskSubmission.set(false); + + ze_command_queue_desc_t queueDesc = {}; + ze_result_t returnValue; + auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue)); + ASSERT_NE(nullptr, commandQueue->commandStream); + + ze_command_list_handle_t commandLists[] = { + CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle()}; + const uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); + + auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + commandQueue->synchronize(0); + + auto commandList = CommandList::fromHandle(commandLists[0]); + commandList->destroy(); + + commandQueue->destroy(); +} + HWTEST_F(L0DebuggerSimpleTest, givenNonZeroGpuVasWhenProgrammingSbaTrackingThenCorrectCmdsAreAddedToStream) { using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; auto debugger = std::make_unique>(neoDevice); diff --git a/opencl/test/unit_test/test_files/igdrcl.config b/opencl/test/unit_test/test_files/igdrcl.config index d8cfdc1f6c..0f2798aa98 100644 --- a/opencl/test/unit_test/test_files/igdrcl.config +++ b/opencl/test/unit_test/test_files/igdrcl.config @@ -42,6 +42,7 @@ CFEFusedEUDispatch = -1 ForceAuxTranslationMode = -1 OverrideGpuAddressSpace = -1 OverrideMaxWorkgroupSize = -1 +EnableFlushTaskSubmission = false DoCpuCopyOnReadBuffer = -1 DoCpuCopyOnWriteBuffer = -1 PauseOnEnqueue = -1 diff --git a/shared/source/command_container/cmdcontainer.cpp b/shared/source/command_container/cmdcontainer.cpp index 86d3088925..3cb479f46a 100644 --- a/shared/source/command_container/cmdcontainer.cpp +++ b/shared/source/command_container/cmdcontainer.cpp @@ -67,7 +67,9 @@ ErrorCode CommandContainer::initialize(Device *device) { defaultListCmdBufferSize)); commandStream->replaceGraphicsAllocation(cmdBufferAllocation); - addToResidencyContainer(cmdBufferAllocation); + if (!getFlushTaskUsedForImmediate()) { + addToResidencyContainer(cmdBufferAllocation); + } constexpr size_t heapSize = 65536u; heapHelper = std::unique_ptr(new HeapHelper(device->getMemoryManager(), device->getDefaultEngine().commandStreamReceiver->getInternalAllocationStorage(), device->getNumAvailableDevices() > 1u)); @@ -236,8 +238,11 @@ void CommandContainer::allocateNextCommandBuffer() { commandStream->replaceBuffer(cmdBufferAllocation->getUnderlyingBuffer(), defaultListCmdBufferSize); commandStream->replaceGraphicsAllocation(cmdBufferAllocation); - addToResidencyContainer(cmdBufferAllocation); + if (!getFlushTaskUsedForImmediate()) { + addToResidencyContainer(cmdBufferAllocation); + } } + void CommandContainer::prepareBindfulSsh() { if (ApiSpecificConfig::getBindlessConfiguration()) { if (allocationIndirectHeaps[IndirectHeap::SURFACE_STATE] == nullptr) { diff --git a/shared/source/command_container/cmdcontainer.h b/shared/source/command_container/cmdcontainer.h index b3458a2dcf..d7bc02686a 100644 --- a/shared/source/command_container/cmdcontainer.h +++ b/shared/source/command_container/cmdcontainer.h @@ -108,6 +108,9 @@ class CommandContainer : public NonCopyableOrMovableClass { } HeapContainer sshAllocations; + bool getFlushTaskUsedForImmediate() { return isFlushTaskUsedForImmediate; } + void setFlushTaskUsedForImmediate(bool flushTaskUsedForImmediate) { isFlushTaskUsedForImmediate = flushTaskUsedForImmediate; } + protected: void *iddBlock = nullptr; Device *device = nullptr; @@ -125,6 +128,8 @@ class CommandContainer : public NonCopyableOrMovableClass { std::unique_ptr indirectHeaps[HeapType::NUM_TYPES]; ResidencyContainer residencyContainer; std::vector deallocationContainer; + + bool isFlushTaskUsedForImmediate = false; }; } // namespace NEO diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index af8734b5ab..7317f66bb7 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -151,6 +151,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, ForceExecutionTile, -1, "-1: default, 0+: given DECLARE_DEBUG_VARIABLE(int32_t, OverrideTimestampPacketSize, -1, "-1: default, >0: size in bytes. 4 and 8 supported for experiments") DECLARE_DEBUG_VARIABLE(int32_t, OverrideMaxWorkGroupCount, -1, "-1: default, >0: Max WG size") DECLARE_DEBUG_VARIABLE(int32_t, OverrideCmdQueueSynchronousMode, -1, "Overrides all command queues synchronous mode: -1: do not override, 0: implicit driver behavior, 1: synchronous, 2: asynchronous") +DECLARE_DEBUG_VARIABLE(bool, EnableFlushTaskSubmission, false, "true: driver uses csr flushTask for immediate submissions, false: driver uses legacy executeCommandList path") /*LOGGING FLAGS*/ DECLARE_DEBUG_VARIABLE(int32_t, PrintDriverDiagnostics, -1, "prints driver diagnostics messages to standard output, value corresponds to hint level")