From 1f862b3ee7e55a1100924b3eead81ecd1c9b439b Mon Sep 17 00:00:00 2001 From: Kamil Kopryk Date: Tue, 9 Sep 2025 10:23:13 +0000 Subject: [PATCH] refactor: flush L3 after post sync Related-To: NEO-13163 Signed-off-by: Kamil Kopryk --- opencl/source/api/api.cpp | 2 +- opencl/source/command_queue/command_queue.cpp | 4 +- opencl/source/command_queue/command_queue.h | 19 ++++---- .../source/command_queue/command_queue_hw.h | 4 +- .../command_queue/command_queue_staging.cpp | 2 +- .../cpu_data_transfer_handler.cpp | 4 +- opencl/source/command_queue/enqueue_common.h | 10 ++-- opencl/source/command_queue/finish.h | 21 +++----- opencl/source/command_queue/gpgpu_walker.h | 13 +++-- .../command_queue/gpgpu_walker_base.inl | 5 +- .../hardware_interface_xehp_and_later.inl | 48 ++++--------------- .../source/gen12lp/gpgpu_walker_gen12lp.cpp | 2 +- .../cpu_page_fault_manager_memory_sync.cpp | 2 +- .../memory_manager/migration_controller.cpp | 6 +-- .../source/os_interface/windows/api_win.cpp | 18 +++---- opencl/source/sharings/gl/cl_gl_api.cpp | 4 +- opencl/source/sharings/va/cl_va_api.cpp | 2 +- .../gpgpu_walker_xe2_hpg_core.cpp | 2 +- .../source/xe3_core/gpgpu_walker_xe3_core.cpp | 2 +- .../xe_hpc_core/gpgpu_walker_xe_hpc_core.cpp | 2 +- .../xe_hpg_core/gpgpu_walker_xe_hpg_core.cpp | 2 +- .../api/cl_enqueue_map_buffer_tests.inl | 2 +- ...ine_data_local_id_tests_xehp_and_later.cpp | 2 +- .../aub_multicontext_tests_xehp_and_later.cpp | 4 +- .../compression_aub_tests_xehp_and_later.cpp | 2 +- .../enqueue_copy_image_aub_tests.cpp | 4 +- .../enqueue_kernel_aub_tests.cpp | 6 +-- .../enqueue_printf_kernel_aub_tests.cpp | 2 +- .../enqueue_read_buffer_rect_aub_tests.cpp | 4 +- .../enqueue_read_image_aub_tests.cpp | 6 +-- .../enqueue_write_buffer_rect_aub_tests.cpp | 4 +- .../enqueue_write_image_aub_tests.cpp | 6 +-- .../large_grf_aub_tests_xehp_and_later.cpp | 10 ++-- .../system_memfence_aub_tests_xe_hpc_core.cpp | 2 +- ...less_compression_aub_tests_xe_hpc_core.cpp | 2 +- ...ateless_compression_in_sba_xe_hpg_core.cpp | 12 ++--- .../command_queue/blit_enqueue_1_tests.cpp | 4 +- .../command_queue_hw_1_tests.cpp | 12 ++--- .../command_queue_hw_2_tests.cpp | 16 +++---- .../command_queue/command_queue_tests.cpp | 2 +- .../command_queue/enqueue_barrier_tests.cpp | 4 +- .../enqueue_command_without_kernel_tests.cpp | 4 +- .../command_queue/enqueue_kernel_1_tests.cpp | 4 +- .../enqueue_kernel_event_tests.cpp | 8 ++-- .../enqueue_read_buffer_rect_tests.cpp | 2 +- .../enqueue_read_buffer_tests.cpp | 2 +- .../enqueue_read_image_tests.cpp | 20 ++++---- .../command_queue/enqueue_thread_tests.cpp | 2 +- .../enqueue_write_buffer_rect_tests.cpp | 2 +- .../enqueue_write_buffer_tests.cpp | 2 +- .../enqueue_write_image_tests.cpp | 18 +++---- .../unit_test/command_queue/finish_tests.cpp | 13 +++-- .../command_queue/get_size_required_tests.cpp | 2 +- .../unit_test/command_queue/oom_tests.cpp | 2 +- ...and_stream_receiver_flush_task_2_tests.cpp | 14 +++--- ...and_stream_receiver_flush_task_4_tests.cpp | 16 +++---- .../unit_test/d3d_sharing/d3d_tests_part2.cpp | 8 ++-- .../helpers/timestamp_packet_1_tests.cpp | 22 ++++----- .../unit_test/mem_obj/buffer_bcs_tests.cpp | 2 +- .../image_release_mapped_ptr_tests.cpp | 2 +- ...u_page_fault_manager_memory_sync_tests.cpp | 2 +- .../test/unit_test/mocks/mock_command_queue.h | 9 ++-- .../command_queue/enqueue_kernel_mt_tests.cpp | 2 +- .../unit_test/profiling/profiling_tests.cpp | 6 +-- ...d_write_buffer_scenarios_windows_tests.cpp | 4 +- .../sharings/va/va_sharing_tests.cpp | 2 +- 66 files changed, 205 insertions(+), 245 deletions(-) diff --git a/opencl/source/api/api.cpp b/opencl/source/api/api.cpp index 754f355894..5a951b07fd 100644 --- a/opencl/source/api/api.cpp +++ b/opencl/source/api/api.cpp @@ -2349,7 +2349,7 @@ cl_int CL_API_CALL clFinish(cl_command_queue commandQueue) { auto pCommandQueue = castToObject(commandQueue); retVal = pCommandQueue - ? pCommandQueue->finish() + ? pCommandQueue->finish(false) : CL_INVALID_COMMAND_QUEUE; TRACING_EXIT(ClFinish, &retVal); return retVal; diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index b7bd26ad89..5dd93b485a 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -654,7 +654,7 @@ cl_int CommandQueue::enqueueReleaseSharedObjects(cl_uint numObjects, const cl_me Event::waitForEvents(numEventsInWaitList, eventWaitList); if (!this->isOOQEnabled()) { - this->finish(); + this->finish(false); } bool isImageReleased = false; @@ -679,7 +679,7 @@ cl_int CommandQueue::enqueueReleaseSharedObjects(cl_uint numObjects, const cl_me TakeOwnershipWrapper queueOwnership(*this); this->taskCount = this->getGpgpuCommandStreamReceiver().peekTaskCount(); } - this->finish(); + this->finish(false); } else if (isImageReleased) { this->getGpgpuCommandStreamReceiver().sendRenderStateCacheFlush(); } diff --git a/opencl/source/command_queue/command_queue.h b/opencl/source/command_queue/command_queue.h index a16b7cf476..2b3cc649e4 100644 --- a/opencl/source/command_queue/command_queue.h +++ b/opencl/source/command_queue/command_queue.h @@ -222,10 +222,12 @@ class CommandQueue : public BaseObject<_cl_command_queue> { virtual cl_int enqueueResourceBarrier(BarrierCommand *resourceBarrier, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) = 0; - virtual cl_int finish() = 0; + virtual cl_int finish(bool resolvePendingL3Flushes) = 0; virtual cl_int flush() = 0; + virtual void programPendingL3Flushes(CommandStreamReceiver &csr, bool &waitForTaskCountRequired, bool resolvePendingL3Flushes) = 0; + void updateFromCompletionStamp(const CompletionStamp &completionStamp, Event *outEvent); virtual bool isCacheFlushCommand(uint32_t commandType) const { return false; } @@ -436,16 +438,12 @@ class CommandQueue : public BaseObject<_cl_command_queue> { bool waitOnDestructionNeeded() const; - bool getL3FlushDeferredIfNeeded() const { - return l3FlushDeferredIfNeeded; + bool getPendingL3FlushForHostVisibleResources() const { + return pendingL3FlushForHostVisibleResources; } - void setL3FlushDeferredIfNeeded(bool newValue) { - l3FlushDeferredIfNeeded = newValue; - } - - void setCheckIfDeferredL3FlushIsNeeded(bool newValue) { - checkIfDeferredL3FlushIsNeeded = newValue; + void setPendingL3FlushForHostVisibleResources(bool newValue) { + pendingL3FlushForHostVisibleResources = newValue; } protected: @@ -566,8 +564,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> { bool l3FlushAfterPostSyncEnabled = false; bool isWalkerWithProfilingEnqueued = false; bool shouldRegisterEnqueuedWalkerWithProfiling = false; - bool l3FlushDeferredIfNeeded = false; - bool checkIfDeferredL3FlushIsNeeded = false; + bool pendingL3FlushForHostVisibleResources = false; }; static_assert(NEO::NonCopyableAndNonMovable); diff --git a/opencl/source/command_queue/command_queue_hw.h b/opencl/source/command_queue/command_queue_hw.h index 4e8a2f6eeb..bd430cae1d 100644 --- a/opencl/source/command_queue/command_queue_hw.h +++ b/opencl/source/command_queue/command_queue_hw.h @@ -365,9 +365,11 @@ class CommandQueueHw : public CommandQueue { const cl_event *eventWaitList, cl_event *event) override; - cl_int finish() override; + cl_int finish(bool resolvePendingL3Flushes) override; cl_int flush() override; + void programPendingL3Flushes(CommandStreamReceiver &csr, bool &waitForTaskCountRequired, bool resolvePendingL3Flushes) override; + template cl_int enqueueHandler(Surface **surfacesForResidency, size_t numSurfaceForResidency, diff --git a/opencl/source/command_queue/command_queue_staging.cpp b/opencl/source/command_queue/command_queue_staging.cpp index 32dca0fed9..8c8eaaace0 100644 --- a/opencl/source/command_queue/command_queue_staging.cpp +++ b/opencl/source/command_queue/command_queue_staging.cpp @@ -165,7 +165,7 @@ cl_int CommandQueue::postStagingTransferSync(const StagingTransferStatus &status } if (isBlocking) { - ret = this->finish(); + ret = this->finish(false); } return ret; } diff --git a/opencl/source/command_queue/cpu_data_transfer_handler.cpp b/opencl/source/command_queue/cpu_data_transfer_handler.cpp index 0be35729da..09bf1e465a 100644 --- a/opencl/source/command_queue/cpu_data_transfer_handler.cpp +++ b/opencl/source/command_queue/cpu_data_transfer_handler.cpp @@ -110,9 +110,7 @@ void *CommandQueue::cpuDataTransferHandler(TransferProperties &transferPropertie } // wait for the completness of previous commands if (transferProperties.finishRequired) { - this->setCheckIfDeferredL3FlushIsNeeded(true); - auto ret = finish(); - this->setCheckIfDeferredL3FlushIsNeeded(false); + auto ret = finish(true); if (ret != CL_SUCCESS) { err.set(ret); diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index 1d4d679ff9..666792b857 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -410,11 +410,9 @@ cl_int CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, } else { UNRECOVERABLE_IF(enqueueProperties.operation != EnqueueProperties::Operation::enqueueWithoutSubmission); - if (this->getL3FlushDeferredIfNeeded()) { + if (this->getPendingL3FlushForHostVisibleResources()) { if (blocking) { - this->setCheckIfDeferredL3FlushIsNeeded(true); - this->finish(); - this->setCheckIfDeferredL3FlushIsNeeded(false); + this->finish(true); } else if (event) { computeCommandStreamReceiver.flushBatchedSubmissions(); @@ -426,7 +424,7 @@ cl_int CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, computeCommandStreamReceiver.obtainCurrentFlushStamp()}; this->updateFromCompletionStamp(completionStamp, nullptr); - this->l3FlushDeferredIfNeeded = false; + this->setPendingL3FlushForHostVisibleResources(false); eventBuilder.getEvent()->setWaitForTaskCountRequired(true); } } @@ -1395,7 +1393,7 @@ cl_int CommandQueueHw::enqueueBlitSplit(MultiDispatchInfo &dispatchIn } if (blocking) { - ret = this->finish(); + ret = this->finish(false); } return ret; diff --git a/opencl/source/command_queue/finish.h b/opencl/source/command_queue/finish.h index bf6ad97bc3..00bc224b78 100644 --- a/opencl/source/command_queue/finish.h +++ b/opencl/source/command_queue/finish.h @@ -14,7 +14,7 @@ namespace NEO { template -cl_int CommandQueueHw::finish() { +cl_int CommandQueueHw::finish(bool resolvePendingL3Flushes) { auto &csr = getGpgpuCommandStreamReceiver(); @@ -24,20 +24,7 @@ cl_int CommandQueueHw::finish() { } bool waitForTaskCountRequired = false; - - if (l3FlushAfterPostSyncEnabled && this->checkIfDeferredL3FlushIsNeeded && this->l3FlushDeferredIfNeeded) { - csr.flushTagUpdate(); - - CompletionStamp completionStamp = { - csr.peekTaskCount(), - std::max(this->taskLevel, csr.peekTaskLevel()), - csr.obtainCurrentFlushStamp()}; - - this->updateFromCompletionStamp(completionStamp, nullptr); - - this->l3FlushDeferredIfNeeded = false; - waitForTaskCountRequired = true; - } + programPendingL3Flushes(csr, waitForTaskCountRequired, resolvePendingL3Flushes); // Stall until HW reaches taskCount on all its engines const auto waitStatus = waitForAllEngines(true, nullptr, waitForTaskCountRequired); @@ -48,4 +35,8 @@ cl_int CommandQueueHw::finish() { return CL_SUCCESS; } +template +void CommandQueueHw::programPendingL3Flushes(CommandStreamReceiver &csr, bool &waitForTaskCountRequired, bool resolvePendingL3Flushes) { +} + } // namespace NEO diff --git a/opencl/source/command_queue/gpgpu_walker.h b/opencl/source/command_queue/gpgpu_walker.h index 56d0d48f67..08ef45af54 100644 --- a/opencl/source/command_queue/gpgpu_walker.h +++ b/opencl/source/command_queue/gpgpu_walker.h @@ -23,6 +23,14 @@ struct RootDeviceEnvironment; template using MI_STORE_REG_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM_CMD; +struct FlushL3Args { + bool containsPrintBuffer; + bool usingSharedObjects; + bool signalEvent; + bool blocking; + bool usingSystemAllocation; +}; + template class GpgpuWalkerHelper { using DefaultWalkerType = typename GfxFamily::DefaultWalkerType; @@ -71,10 +79,7 @@ class GpgpuWalkerHelper { template static void setupTimestampPacketFlushL3( - WalkerType *walkerCmd, - const ProductHelper &productHelper, - bool flushL3AfterPostSyncForHostUsm, - bool flushL3AfterPostSyncForExternalAllocation); + WalkerType &walkerCmd, CommandQueue &commandQueue, const FlushL3Args &args); static void adjustMiStoreRegMemMode(MI_STORE_REG_MEM *storeCmd); diff --git a/opencl/source/command_queue/gpgpu_walker_base.inl b/opencl/source/command_queue/gpgpu_walker_base.inl index 115906f356..b4be291c82 100644 --- a/opencl/source/command_queue/gpgpu_walker_base.inl +++ b/opencl/source/command_queue/gpgpu_walker_base.inl @@ -162,9 +162,6 @@ size_t EnqueueOperation::getSizeRequiredCSNonKernel(bool reserveProfi template template -void GpgpuWalkerHelper::setupTimestampPacketFlushL3(WalkerType *walkerCmd, - const ProductHelper &productHelper, - bool flushL3AfterPostSyncForHostUsm, - bool flushL3AfterPostSyncForExternalAllocation) { +void GpgpuWalkerHelper::setupTimestampPacketFlushL3(WalkerType &walkerCmd, CommandQueue &commandQueue, const FlushL3Args &args) { } } // namespace NEO diff --git a/opencl/source/command_queue/hardware_interface_xehp_and_later.inl b/opencl/source/command_queue/hardware_interface_xehp_and_later.inl index 6ac2e23078..8546349670 100644 --- a/opencl/source/command_queue/hardware_interface_xehp_and_later.inl +++ b/opencl/source/command_queue/hardware_interface_xehp_and_later.inl @@ -85,13 +85,8 @@ inline void HardwareInterface::programWalker( auto &queueCsr = commandQueue.getGpgpuCommandStreamReceiver(); auto &device = commandQueue.getDevice(); auto &rootDeviceEnvironment = device.getRootDeviceEnvironment(); - - bool kernelSystemAllocation = false; - if (kernel.isBuiltInKernel()) { - kernelSystemAllocation = kernel.getDestinationAllocationInSystemMemory(); - } else { - kernelSystemAllocation = kernel.isAnyKernelArgumentUsingSystemMemory(); - } + bool kernelSystemAllocation = kernel.isBuiltInKernel() ? kernel.getDestinationAllocationInSystemMemory() + : kernel.isAnyKernelArgumentUsingSystemMemory(); TagNodeBase *timestampPacketNode = nullptr; if (walkerArgs.currentTimestampPacketNodes && (walkerArgs.currentTimestampPacketNodes->peekNodes().size() > walkerArgs.currentDispatchIndex)) { @@ -106,40 +101,17 @@ inline void HardwareInterface::programWalker( if constexpr (heaplessModeEnabled) { auto &productHelper = rootDeviceEnvironment.getHelper(); - auto containsPrintBuffer = kernel.hasPrintfOutput(); - bool l3FlushDeferredIfNeeded = false; - - bool flushL3AfterPostSyncForHostUsm = kernelSystemAllocation || containsPrintBuffer; - bool flushL3AfterPostSyncForExternalAllocation = kernel.isUsingSharedObjArgs(); - - l3FlushDeferredIfNeeded = flushL3AfterPostSyncForHostUsm || flushL3AfterPostSyncForExternalAllocation; - - if (debugManager.flags.RedirectFlushL3HostUsmToExternal.get() && flushL3AfterPostSyncForHostUsm) { - flushL3AfterPostSyncForHostUsm = false; - flushL3AfterPostSyncForExternalAllocation = true; - } - - bool forceFlushL3 = false; - if (debugManager.flags.ForceFlushL3AfterPostSyncForHostUsm.get()) { - forceFlushL3 = true; - flushL3AfterPostSyncForHostUsm = true; - } - if (debugManager.flags.ForceFlushL3AfterPostSyncForExternalAllocation.get()) { - forceFlushL3 = true; - flushL3AfterPostSyncForExternalAllocation = true; - } - - if (walkerArgs.event != nullptr || walkerArgs.blocking || containsPrintBuffer || forceFlushL3) { - GpgpuWalkerHelper::template setupTimestampPacketFlushL3(&walkerCmd, productHelper, flushL3AfterPostSyncForHostUsm, flushL3AfterPostSyncForExternalAllocation); - l3FlushDeferredIfNeeded = false; - } - - if (l3FlushDeferredIfNeeded) { - commandQueue.setL3FlushDeferredIfNeeded(true); + if (productHelper.isL3FlushAfterPostSyncRequired(true)) { + GpgpuWalkerHelper::setupTimestampPacketFlushL3(walkerCmd, + commandQueue, + FlushL3Args{.containsPrintBuffer = kernel.hasPrintfOutput(), + .usingSharedObjects = kernel.isUsingSharedObjArgs(), + .signalEvent = walkerArgs.event != nullptr, + .blocking = walkerArgs.blocking, + .usingSystemAllocation = kernelSystemAllocation}); } } } - auto isCcsUsed = EngineHelpers::isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType()); if constexpr (heaplessModeEnabled == false) { diff --git a/opencl/source/gen12lp/gpgpu_walker_gen12lp.cpp b/opencl/source/gen12lp/gpgpu_walker_gen12lp.cpp index d165b87f5e..4e92eb866a 100644 --- a/opencl/source/gen12lp/gpgpu_walker_gen12lp.cpp +++ b/opencl/source/gen12lp/gpgpu_walker_gen12lp.cpp @@ -297,7 +297,7 @@ template void HardwareInterface::dispatchKernelCommands::allocateWalkerSpace(LinearStream &commandStream, const Kernel &kernel); template class GpgpuWalkerHelper; -template void GpgpuWalkerHelper::setupTimestampPacketFlushL3(Family::DefaultWalkerType *walkerCmd, const ProductHelper &productHelper, bool flushL3AfterPostSyncForHostUsm, bool flushL3AfterPostSyncForExternalAllocation); +template void GpgpuWalkerHelper::setupTimestampPacketFlushL3(Family::DefaultWalkerType &walkerCmd, CommandQueue &commandQueue, const FlushL3Args &args); template void GpgpuWalkerHelper::setupTimestampPacket(LinearStream *cmdStream, Family::DefaultWalkerType *walkerCmd, TagNodeBase *timestampPacketNode, const RootDeviceEnvironment &rootDeviceEnvironment); template size_t GpgpuWalkerHelper::setGpgpuWalkerThreadData(Family::DefaultWalkerType *walkerCmd, const KernelDescriptor &kernelDescriptor, const size_t startWorkGroups[3], const size_t numWorkGroups[3], const size_t localWorkSizesIn[3], uint32_t simd, uint32_t workDim, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, uint32_t requiredWorkGroupOrder); diff --git a/opencl/source/memory_manager/cpu_page_fault_manager_memory_sync.cpp b/opencl/source/memory_manager/cpu_page_fault_manager_memory_sync.cpp index b308bb9e48..8439fdcf9e 100644 --- a/opencl/source/memory_manager/cpu_page_fault_manager_memory_sync.cpp +++ b/opencl/source/memory_manager/cpu_page_fault_manager_memory_sync.cpp @@ -29,7 +29,7 @@ void CpuPageFaultManager::transferToGpu(void *ptr, void *cmdQ) { memoryData[ptr].unifiedMemoryManager->insertSvmMapOperation(ptr, memoryData[ptr].size, ptr, 0, false); auto retVal = commandQueue->enqueueSVMUnmap(ptr, 0, nullptr, nullptr, false); UNRECOVERABLE_IF(retVal); - retVal = commandQueue->finish(); + retVal = commandQueue->finish(false); UNRECOVERABLE_IF(retVal); auto allocData = memoryData[ptr].unifiedMemoryManager->getSVMAlloc(ptr); diff --git a/opencl/source/memory_manager/migration_controller.cpp b/opencl/source/memory_manager/migration_controller.cpp index 6b686919db..78769599bc 100644 --- a/opencl/source/memory_manager/migration_controller.cpp +++ b/opencl/source/memory_manager/migration_controller.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2023 Intel Corporation + * Copyright (C) 2021-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -70,7 +70,7 @@ void MigrationController::migrateMemory(Context &context, MemoryManager &memoryM auto pBuffer = static_cast(memObj); srcCmdQ->enqueueReadBuffer(pBuffer, CL_TRUE, 0u, pBuffer->getSize(), hostPtr, nullptr, 0, nullptr, nullptr); } - srcCmdQ->finish(); + srcCmdQ->finish(false); } if (dstMemory->isAllocationLockable()) { @@ -91,7 +91,7 @@ void MigrationController::migrateMemory(Context &context, MemoryManager &memoryM auto pBuffer = static_cast(memObj); dstCmdQ->enqueueWriteBuffer(pBuffer, CL_TRUE, 0u, pBuffer->getSize(), hostPtr, nullptr, 0, nullptr, nullptr); } - dstCmdQ->finish(); + dstCmdQ->finish(false); } migrationSyncData->setCurrentLocation(targetRootDeviceIndex); } diff --git a/opencl/source/os_interface/windows/api_win.cpp b/opencl/source/os_interface/windows/api_win.cpp index 50575fda6a..00ef697171 100644 --- a/opencl/source/os_interface/windows/api_win.cpp +++ b/opencl/source/os_interface/windows/api_win.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -193,7 +193,7 @@ cl_int CL_API_CALL clEnqueueReleaseDX9ObjectsINTEL(cl_command_queue commandQueue for (unsigned int object = 0; object < numObjects; object++) { auto memObject = castToObject(memObjects[object]); if (!static_cast *>(memObject->peekSharingHandler())->isSharedResource()) { - cmdQ->finish(); + cmdQ->finish(false); break; } } @@ -201,7 +201,7 @@ cl_int CL_API_CALL clEnqueueReleaseDX9ObjectsINTEL(cl_command_queue commandQueue retVal = cmdQ->enqueueReleaseSharedObjects(numObjects, memObjects, numEventsInWaitList, eventWaitList, event, CL_COMMAND_RELEASE_DX9_OBJECTS_INTEL); if (!cmdQ->getContext().getInteropUserSyncEnabled()) { - cmdQ->finish(); + cmdQ->finish(false); } DBG_LOG_INPUTS("event", getClFileLogger().getEvents(reinterpret_cast(event), 1u)); @@ -291,7 +291,7 @@ cl_int CL_API_CALL clEnqueueReleaseDX9MediaSurfacesKHR(cl_command_queue commandQ auto memObject = castToObject(memObjects[object]); if (memObject) { if (!static_cast *>(memObject->peekSharingHandler())->isSharedResource()) { - cmdQ->finish(); + cmdQ->finish(false); break; } @@ -304,7 +304,7 @@ cl_int CL_API_CALL clEnqueueReleaseDX9MediaSurfacesKHR(cl_command_queue commandQ retVal = cmdQ->enqueueReleaseSharedObjects(numObjects, memObjects, numEventsInWaitList, eventWaitList, event, CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR); if (!cmdQ->getContext().getInteropUserSyncEnabled()) { - cmdQ->finish(); + cmdQ->finish(false); } DBG_LOG_INPUTS("event", getClFileLogger().getEvents(reinterpret_cast(event), 1u)); @@ -512,7 +512,7 @@ cl_int CL_API_CALL clEnqueueReleaseD3D10ObjectsKHR(cl_command_queue commandQueue return retVal; } if (!static_cast *>(memObject->peekSharingHandler())->isSharedResource()) { - cmdQ->finish(); + cmdQ->finish(false); break; } } @@ -520,7 +520,7 @@ cl_int CL_API_CALL clEnqueueReleaseD3D10ObjectsKHR(cl_command_queue commandQueue retVal = cmdQ->enqueueReleaseSharedObjects(numObjects, memObjects, numEventsInWaitList, eventWaitList, event, CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR); if (!cmdQ->getContext().getInteropUserSyncEnabled()) { - cmdQ->finish(); + cmdQ->finish(false); } DBG_LOG_INPUTS("event", getClFileLogger().getEvents(reinterpret_cast(event), 1u)); @@ -725,7 +725,7 @@ cl_int CL_API_CALL clEnqueueReleaseD3D11ObjectsKHR(cl_command_queue commandQueue return retVal; } if (!static_cast *>(memObject->peekSharingHandler())->isSharedResource()) { - cmdQ->finish(); + cmdQ->finish(false); break; } } @@ -733,7 +733,7 @@ cl_int CL_API_CALL clEnqueueReleaseD3D11ObjectsKHR(cl_command_queue commandQueue retVal = cmdQ->enqueueReleaseSharedObjects(numObjects, memObjects, numEventsInWaitList, eventWaitList, event, CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR); if (!cmdQ->getContext().getInteropUserSyncEnabled()) { - cmdQ->finish(); + cmdQ->finish(false); } DBG_LOG_INPUTS("event", getClFileLogger().getEvents(reinterpret_cast(event), 1u)); diff --git a/opencl/source/sharings/gl/cl_gl_api.cpp b/opencl/source/sharings/gl/cl_gl_api.cpp index 696e9401dc..f623e4c8a4 100644 --- a/opencl/source/sharings/gl/cl_gl_api.cpp +++ b/opencl/source/sharings/gl/cl_gl_api.cpp @@ -275,9 +275,7 @@ cl_int CL_API_CALL clEnqueueReleaseGLObjects(cl_command_queue commandQueue, cl_u TRACING_EXIT(ClEnqueueReleaseGlObjects, &retVal); return retVal; } - pCommandQueue->setCheckIfDeferredL3FlushIsNeeded(true); - pCommandQueue->finish(); - pCommandQueue->setCheckIfDeferredL3FlushIsNeeded(false); + pCommandQueue->finish(true); retVal = pCommandQueue->enqueueReleaseSharedObjects(numObjects, memObjects, numEventsInWaitList, eventWaitList, event, CL_COMMAND_RELEASE_GL_OBJECTS); diff --git a/opencl/source/sharings/va/cl_va_api.cpp b/opencl/source/sharings/va/cl_va_api.cpp index c849c276ef..3318685f60 100644 --- a/opencl/source/sharings/va/cl_va_api.cpp +++ b/opencl/source/sharings/va/cl_va_api.cpp @@ -136,7 +136,7 @@ clEnqueueReleaseVA_APIMediaSurfacesINTEL(cl_command_queue commandQueue, status = pCommandQueue->enqueueReleaseSharedObjects(numObjects, memObjects, numEventsInWaitList, eventWaitList, event, CL_COMMAND_RELEASE_VA_API_MEDIA_SURFACES_INTEL); if (!pCommandQueue->getContext().getInteropUserSyncEnabled()) { - pCommandQueue->finish(); + pCommandQueue->finish(false); } } return status; diff --git a/opencl/source/xe2_hpg_core/gpgpu_walker_xe2_hpg_core.cpp b/opencl/source/xe2_hpg_core/gpgpu_walker_xe2_hpg_core.cpp index f8311ce0bc..2319e60abd 100644 --- a/opencl/source/xe2_hpg_core/gpgpu_walker_xe2_hpg_core.cpp +++ b/opencl/source/xe2_hpg_core/gpgpu_walker_xe2_hpg_core.cpp @@ -16,7 +16,7 @@ using Family = Xe2HpgCoreFamily; template class GpgpuWalkerHelper; -template void GpgpuWalkerHelper::setupTimestampPacketFlushL3(Family::DefaultWalkerType *walkerCmd, const ProductHelper &productHelper, bool flushL3AfterPostSyncForHostUsm, bool flushL3AfterPostSyncForExternalAllocation); +template void GpgpuWalkerHelper::setupTimestampPacketFlushL3(Family::DefaultWalkerType &walkerCmd, CommandQueue &commandQueue, const FlushL3Args &args); template void GpgpuWalkerHelper::setupTimestampPacket(LinearStream *cmdStream, Family::DefaultWalkerType *walkerCmd, TagNodeBase *timestampPacketNode, const RootDeviceEnvironment &rootDeviceEnvironment); template size_t GpgpuWalkerHelper::setGpgpuWalkerThreadData(Family::DefaultWalkerType *walkerCmd, const KernelDescriptor &kernelDescriptor, const size_t startWorkGroups[3], const size_t numWorkGroups[3], const size_t localWorkSizesIn[3], uint32_t simd, uint32_t workDim, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, uint32_t requiredWorkGroupOrder); diff --git a/opencl/source/xe3_core/gpgpu_walker_xe3_core.cpp b/opencl/source/xe3_core/gpgpu_walker_xe3_core.cpp index 4b27e7c803..52677c1e45 100644 --- a/opencl/source/xe3_core/gpgpu_walker_xe3_core.cpp +++ b/opencl/source/xe3_core/gpgpu_walker_xe3_core.cpp @@ -15,7 +15,7 @@ using Family = Xe3CoreFamily; template class GpgpuWalkerHelper; -template void GpgpuWalkerHelper::setupTimestampPacketFlushL3(Family::DefaultWalkerType *walkerCmd, const ProductHelper &productHelper, bool flushL3AfterPostSyncForHostUsm, bool flushL3AfterPostSyncForExternalAllocation); +template void GpgpuWalkerHelper::setupTimestampPacketFlushL3(Family::DefaultWalkerType &walkerCmd, CommandQueue &commandQueue, const FlushL3Args &args); template void GpgpuWalkerHelper::setupTimestampPacket(LinearStream *cmdStream, Family::DefaultWalkerType *walkerCmd, TagNodeBase *timestampPacketNode, const RootDeviceEnvironment &rootDeviceEnvironment); template size_t GpgpuWalkerHelper::setGpgpuWalkerThreadData(Family::DefaultWalkerType *walkerCmd, const KernelDescriptor &kernelDescriptor, const size_t startWorkGroups[3], const size_t numWorkGroups[3], const size_t localWorkSizesIn[3], uint32_t simd, uint32_t workDim, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, uint32_t requiredWorkGroupOrder); diff --git a/opencl/source/xe_hpc_core/gpgpu_walker_xe_hpc_core.cpp b/opencl/source/xe_hpc_core/gpgpu_walker_xe_hpc_core.cpp index 42e235492c..bb0a0b2d33 100644 --- a/opencl/source/xe_hpc_core/gpgpu_walker_xe_hpc_core.cpp +++ b/opencl/source/xe_hpc_core/gpgpu_walker_xe_hpc_core.cpp @@ -24,7 +24,7 @@ void GpgpuWalkerHelper::setSystolicModeEnable(Family::COMPUTE_WALKER *wa template class GpgpuWalkerHelper; -template void GpgpuWalkerHelper::setupTimestampPacketFlushL3(Family::DefaultWalkerType *walkerCmd, const ProductHelper &productHelper, bool flushL3AfterPostSyncForHostUsm, bool flushL3AfterPostSyncForExternalAllocation); +template void GpgpuWalkerHelper::setupTimestampPacketFlushL3(Family::DefaultWalkerType &walkerCmd, CommandQueue &commandQueue, const FlushL3Args &args); template void GpgpuWalkerHelper::setupTimestampPacket(LinearStream *cmdStream, Family::DefaultWalkerType *walkerCmd, TagNodeBase *timestampPacketNode, const RootDeviceEnvironment &rootDeviceEnvironment); template size_t GpgpuWalkerHelper::setGpgpuWalkerThreadData(Family::DefaultWalkerType *walkerCmd, const KernelDescriptor &kernelDescriptor, const size_t startWorkGroups[3], const size_t numWorkGroups[3], const size_t localWorkSizesIn[3], uint32_t simd, uint32_t workDim, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, uint32_t requiredWorkGroupOrder); diff --git a/opencl/source/xe_hpg_core/gpgpu_walker_xe_hpg_core.cpp b/opencl/source/xe_hpg_core/gpgpu_walker_xe_hpg_core.cpp index 1333b8a45f..35d1fa4489 100644 --- a/opencl/source/xe_hpg_core/gpgpu_walker_xe_hpg_core.cpp +++ b/opencl/source/xe_hpg_core/gpgpu_walker_xe_hpg_core.cpp @@ -24,7 +24,7 @@ void GpgpuWalkerHelper::setSystolicModeEnable(Family::COMPUTE_WALKER *wa template class GpgpuWalkerHelper; -template void GpgpuWalkerHelper::setupTimestampPacketFlushL3(Family::DefaultWalkerType *walkerCmd, const ProductHelper &productHelper, bool flushL3AfterPostSyncForHostUsm, bool flushL3AfterPostSyncForExternalAllocation); +template void GpgpuWalkerHelper::setupTimestampPacketFlushL3(Family::DefaultWalkerType &walkerCmd, CommandQueue &commandQueue, const FlushL3Args &args); template void GpgpuWalkerHelper::setupTimestampPacket(LinearStream *cmdStream, Family::DefaultWalkerType *walkerCmd, TagNodeBase *timestampPacketNode, const RootDeviceEnvironment &rootDeviceEnvironment); template size_t GpgpuWalkerHelper::setGpgpuWalkerThreadData(Family::DefaultWalkerType *walkerCmd, const KernelDescriptor &kernelDescriptor, const size_t startWorkGroups[3], const size_t numWorkGroups[3], const size_t localWorkSizesIn[3], uint32_t simd, uint32_t workDim, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, uint32_t requiredWorkGroupOrder); diff --git a/opencl/test/unit_test/api/cl_enqueue_map_buffer_tests.inl b/opencl/test/unit_test/api/cl_enqueue_map_buffer_tests.inl index f9483a9383..e75ea962d8 100644 --- a/opencl/test/unit_test/api/cl_enqueue_map_buffer_tests.inl +++ b/opencl/test/unit_test/api/cl_enqueue_map_buffer_tests.inl @@ -136,7 +136,7 @@ TEST_F(ClEnqueueMapBufferTests, GivenFinishFailsWhenMappingBufferThenOutOfResour struct MockCommandQueueWithFinishFailure : public MockCommandQueue { MockCommandQueueWithFinishFailure(Context *context) : MockCommandQueue(*context) {} - cl_int finish() override { + cl_int finish(bool resolvePendingL3Flushes) override { return CL_OUT_OF_RESOURCES; } } mockQueue(pContext); diff --git a/opencl/test/unit_test/aub_tests/command_queue/aub_inline_data_local_id_tests_xehp_and_later.cpp b/opencl/test/unit_test/aub_tests/command_queue/aub_inline_data_local_id_tests_xehp_and_later.cpp index d5cd0e0889..c1c447bafc 100644 --- a/opencl/test/unit_test/aub_tests/command_queue/aub_inline_data_local_id_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/aub_tests/command_queue/aub_inline_data_local_id_tests_xehp_and_later.cpp @@ -505,7 +505,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterAubHwLocalIdsWithSubgroupsTest, givenKe EXPECT_EQ(kernel->getKernelInfo().kernelDescriptor.kernelAttributes.workgroupWalkOrder[i], HwWalkOrderHelper::compatibleDimensionOrders[walker->getWalkOrder()][i]); } - pCmdQ->finish(); + pCmdQ->finish(false); // we expect sequence of local ids from 0..256 auto expectedMemory = reinterpret_cast(variables[0].expectedMemory); diff --git a/opencl/test/unit_test/aub_tests/command_queue/aub_multicontext_tests_xehp_and_later.cpp b/opencl/test/unit_test/aub_tests/command_queue/aub_multicontext_tests_xehp_and_later.cpp index 0c71d8b580..fee0bad3b6 100644 --- a/opencl/test/unit_test/aub_tests/command_queue/aub_multicontext_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/aub_tests/command_queue/aub_multicontext_tests_xehp_and_later.cpp @@ -576,8 +576,8 @@ HWTEST2_F(SingleTileDualContextTest, givenSingleAllocationWhenUpdatedFromDiffere commandQueues[0][0]->enqueueWriteBuffer(buffer.get(), CL_FALSE, 0, halfBufferSize, writePattern1, nullptr, 0, nullptr, nullptr); commandQueues[0][1]->enqueueWriteBuffer(buffer.get(), CL_FALSE, halfBufferSize, halfBufferSize, writePattern2, nullptr, 0, nullptr, nullptr); - commandQueues[0][1]->finish(); // submit second enqueue first to make sure that residency flow is correct - commandQueues[0][0]->finish(); + commandQueues[0][1]->finish(false); // submit second enqueue first to make sure that residency flow is correct + commandQueues[0][0]->finish(false); auto gpuPtr = reinterpret_cast(buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress() + buffer->getOffset()); expectMemory(gpuPtr, writePattern1, halfBufferSize, 0, 0); diff --git a/opencl/test/unit_test/aub_tests/command_queue/compression_aub_tests_xehp_and_later.cpp b/opencl/test/unit_test/aub_tests/command_queue/compression_aub_tests_xehp_and_later.cpp index 81f29b4bee..c372dff384 100644 --- a/opencl/test/unit_test/aub_tests/command_queue/compression_aub_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/aub_tests/command_queue/compression_aub_tests_xehp_and_later.cpp @@ -107,7 +107,7 @@ void CompressionXeHPAndLater::givenCompressedBuffersWhenWriting pCmdQ->enqueueWriteBuffer(compressedBuffer.get(), CL_FALSE, 0, bufferSize, writePattern, nullptr, 0, nullptr, nullptr); pCmdQ->enqueueCopyBuffer(compressedBuffer.get(), notCompressedBuffer.get(), 0, 0, bufferSize, 0, nullptr, nullptr); - pCmdQ->finish(); + pCmdQ->finish(false); expectNotEqualMemory(AUBFixture::getGpuPointer(compressedAllocation), writePattern, bufferSize); diff --git a/opencl/test/unit_test/aub_tests/command_queue/enqueue_copy_image_aub_tests.cpp b/opencl/test/unit_test/aub_tests/command_queue/enqueue_copy_image_aub_tests.cpp index 37924fd83c..0d35e83a1e 100644 --- a/opencl/test/unit_test/aub_tests/command_queue/enqueue_copy_image_aub_tests.cpp +++ b/opencl/test/unit_test/aub_tests/command_queue/enqueue_copy_image_aub_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -118,7 +118,7 @@ struct AUBCopyImage retVal = pCmdQ->enqueueReadImage(dstImage.get(), CL_FALSE, imgOrigin, imgRegion, 0, 0, dstOutMemory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); - retVal = pCmdQ->finish(); + retVal = pCmdQ->finish(false); EXPECT_EQ(CL_SUCCESS, retVal); // Offset the source memory diff --git a/opencl/test/unit_test/aub_tests/command_queue/enqueue_kernel_aub_tests.cpp b/opencl/test/unit_test/aub_tests/command_queue/enqueue_kernel_aub_tests.cpp index a51b59ad73..9d6a88a1a5 100644 --- a/opencl/test/unit_test/aub_tests/command_queue/enqueue_kernel_aub_tests.cpp +++ b/opencl/test/unit_test/aub_tests/command_queue/enqueue_kernel_aub_tests.cpp @@ -589,7 +589,7 @@ HWTEST_F(AUBSimpleAtomicTest, givenKernelWithAtomicWhenExecutedThenExpectAtomicV event); ASSERT_EQ(CL_SUCCESS, retVal); - pCmdQ->finish(); + pCmdQ->finish(false); expectMemory(bufferGpuAddress, this->expectedMemory, sizeWrittenMemory - sizeof(int)); size_t testGlobalMax = globalWorkSize[0] * globalWorkSize[1] * globalWorkSize[2]; @@ -1034,7 +1034,7 @@ HWTEST2_F(AUBBindlessKernel, DISABLED_givenBindlessCopyKernelWhenEnqueuedThenRes EXPECT_TRUE(this->kernel->getKernelInfo().kernelDescriptor.payloadMappings.explicitArgs[0].as().isPureStateful()); - this->pCmdQ->finish(); + this->pCmdQ->finish(false); expectMemory(addrToPtr(ptrOffset(pBufferDst->getGraphicsAllocation(device->getRootDeviceIndex())->getGpuAddress(), pBufferDst->getOffset())), bufferDataSrc, bufferSize); } @@ -1129,7 +1129,7 @@ HWTEST2_F(AUBBindlessKernel, DISABLED_givenBindlessCopyImageKernelWhenEnqueuedTh event); EXPECT_EQ(CL_SUCCESS, retVal); - retVal = this->pCmdQ->finish(); + retVal = this->pCmdQ->finish(false); EXPECT_EQ(CL_SUCCESS, retVal); expectMemory(reinterpret_cast(image->getGraphicsAllocation(device->getRootDeviceIndex())->getGpuAddress()), diff --git a/opencl/test/unit_test/aub_tests/command_queue/enqueue_printf_kernel_aub_tests.cpp b/opencl/test/unit_test/aub_tests/command_queue/enqueue_printf_kernel_aub_tests.cpp index 445a669e23..19333bd9ff 100644 --- a/opencl/test/unit_test/aub_tests/command_queue/enqueue_printf_kernel_aub_tests.cpp +++ b/opencl/test/unit_test/aub_tests/command_queue/enqueue_printf_kernel_aub_tests.cpp @@ -56,5 +56,5 @@ HWTEST_F(AUBPrintfKernelFixture, GivenPrintfKernelThenEnqueuingSucceeds) { &bufferMem); pCmdQ->enqueueKernel(pKernel, 1, offset, gws, lws, 0, 0, 0); - pCmdQ->finish(); + pCmdQ->finish(false); } diff --git a/opencl/test/unit_test/aub_tests/command_queue/enqueue_read_buffer_rect_aub_tests.cpp b/opencl/test/unit_test/aub_tests/command_queue/enqueue_read_buffer_rect_aub_tests.cpp index e74a8937ee..15884cf1ea 100644 --- a/opencl/test/unit_test/aub_tests/command_queue/enqueue_read_buffer_rect_aub_tests.cpp +++ b/opencl/test/unit_test/aub_tests/command_queue/enqueue_read_buffer_rect_aub_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -181,7 +181,7 @@ struct AUBReadBufferRectUnaligned expectMemory(dstMemoryGPUPtr, referenceMemory, offset); expectMemory(ptrOffset(dstMemoryGPUPtr, offset), &srcMemory[rowPitch * bufferOrigin[1]], size); expectMemory(ptrOffset(dstMemoryGPUPtr, size + offset), referenceMemory, bufferSize - offset - size); - pCmdQ->finish(); + pCmdQ->finish(false); alignedFree(dstMemory); } }; diff --git a/opencl/test/unit_test/aub_tests/command_queue/enqueue_read_image_aub_tests.cpp b/opencl/test/unit_test/aub_tests/command_queue/enqueue_read_image_aub_tests.cpp index b272676bef..2bd4f3b6be 100644 --- a/opencl/test/unit_test/aub_tests/command_queue/enqueue_read_image_aub_tests.cpp +++ b/opencl/test/unit_test/aub_tests/command_queue/enqueue_read_image_aub_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -148,7 +148,7 @@ struct AUBReadImage nullptr); EXPECT_EQ(CL_SUCCESS, retVal); - retVal = pCmdQ->finish(); + retVal = pCmdQ->finish(false); EXPECT_EQ(CL_SUCCESS, retVal); auto imageMemory = srcMemory; @@ -275,7 +275,7 @@ struct AUBReadImage nullptr); EXPECT_EQ(CL_SUCCESS, retVal); - pCmdQ->finish(); + pCmdQ->finish(false); std::vector referenceMemory(pixelSize * numPixels, 0x0); diff --git a/opencl/test/unit_test/aub_tests/command_queue/enqueue_write_buffer_rect_aub_tests.cpp b/opencl/test/unit_test/aub_tests/command_queue/enqueue_write_buffer_rect_aub_tests.cpp index 45e132fe54..5b168ae5e3 100644 --- a/opencl/test/unit_test/aub_tests/command_queue/enqueue_write_buffer_rect_aub_tests.cpp +++ b/opencl/test/unit_test/aub_tests/command_queue/enqueue_write_buffer_rect_aub_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -170,7 +170,7 @@ struct AUBWriteBufferRectUnaligned nullptr); EXPECT_EQ(CL_SUCCESS, retVal); - pCmdQ->finish(); + pCmdQ->finish(false); expectMemory(pDestMemory, referenceMemory, rowPitch); expectMemory(pDestMemory + rowPitch * bufferOrigin[1], ptrOffset(srcMemory, offset), size); diff --git a/opencl/test/unit_test/aub_tests/command_queue/enqueue_write_image_aub_tests.cpp b/opencl/test/unit_test/aub_tests/command_queue/enqueue_write_image_aub_tests.cpp index 3b8ba8d8ef..b24764beca 100644 --- a/opencl/test/unit_test/aub_tests/command_queue/enqueue_write_image_aub_tests.cpp +++ b/opencl/test/unit_test/aub_tests/command_queue/enqueue_write_image_aub_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -149,7 +149,7 @@ struct AUBWriteImage retVal = pCmdQ->enqueueReadImage(dstImage.get(), CL_TRUE, imgOrigin, imgRegion, 0, 0, readMemory, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); - retVal = pCmdQ->finish(); + retVal = pCmdQ->finish(false); EXPECT_EQ(CL_SUCCESS, retVal); auto pDstMemory = readMemory; @@ -269,7 +269,7 @@ struct AUBWriteImage nullptr); EXPECT_EQ(CL_SUCCESS, retVal); - pCmdQ->finish(); + pCmdQ->finish(false); EXPECT_EQ(CL_SUCCESS, retVal); auto imageRowPitch = image->getImageDesc().image_row_pitch; diff --git a/opencl/test/unit_test/aub_tests/command_queue/large_grf_aub_tests_xehp_and_later.cpp b/opencl/test/unit_test/aub_tests/command_queue/large_grf_aub_tests_xehp_and_later.cpp index 7df01ecf57..545f312b17 100644 --- a/opencl/test/unit_test/aub_tests/command_queue/large_grf_aub_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/aub_tests/command_queue/large_grf_aub_tests_xehp_and_later.cpp @@ -338,7 +338,7 @@ HWTEST2_P(LargeGrfTest, givenLargeGrfKernelWhenExecutedThenResultsAreCorrect, Is ASSERT_EQ(CL_SUCCESS, retVal); - pCmdQ->finish(); + pCmdQ->finish(false); auto largeGrfValues = NEO::UnitTestHelper::getProgrammedLargeGrfValues(pCmdQ->getGpgpuCommandStreamReceiver(), pCmdQ->getCS(0)); @@ -377,7 +377,7 @@ HWTEST2_P(LargeGrfTest, givenKernelWithSpillWhenExecutedInLargeGrfThenDontSpillA ASSERT_EQ(CL_SUCCESS, retVal); - pCmdQ->finish(); + pCmdQ->finish(false); auto largeGrfValues = NEO::UnitTestHelper::getProgrammedLargeGrfValues(pCmdQ->getGpgpuCommandStreamReceiver(), pCmdQ->getCS(0)); @@ -435,7 +435,7 @@ HWTEST2_P(LargeGrfTest, givenMixedLargeGrfAndSmallGrfKernelsWhenExecutedThenResu globalWorkSize, nullptr, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); - pCmdQ->finish(); + pCmdQ->finish(false); auto largeGrfValues = NEO::UnitTestHelper::getProgrammedLargeGrfValues(pCmdQ->getGpgpuCommandStreamReceiver(), pCmdQ->getCS(0)); @@ -536,8 +536,8 @@ HWTEST2_P(MultiContextLargeGrfKernelAubTest, givenLargeAndSmallGrfWhenParallelRu nullptr, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); - largeGrfQueue->finish(); - smallGrfQueue->finish(); + largeGrfQueue->finish(false); + smallGrfQueue->finish(false); MulticontextOclAubFixture::expectMemory( AUBFixture::getGpuPointer(destinationBuffer->getGraphicsAllocation(rootDeviceIndex), destinationBuffer->getOffset()), diff --git a/opencl/test/unit_test/aub_tests/xe_hpc_core/system_memfence_aub_tests_xe_hpc_core.cpp b/opencl/test/unit_test/aub_tests/xe_hpc_core/system_memfence_aub_tests_xe_hpc_core.cpp index e0de530bd1..bf353aca00 100644 --- a/opencl/test/unit_test/aub_tests/xe_hpc_core/system_memfence_aub_tests_xe_hpc_core.cpp +++ b/opencl/test/unit_test/aub_tests/xe_hpc_core/system_memfence_aub_tests_xe_hpc_core.cpp @@ -221,7 +221,7 @@ XE_HPC_CORETEST_F(SystemMemFenceViaKernel, givenSystemMemFenceWhenKernelInstruct retVal = commandQueues[0][0]->enqueueKernel(pMultiDeviceKernel->getKernel(rootDeviceIndex), 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); - commandQueues[0][0]->finish(); + commandQueues[0][0]->finish(false); expectMemory(hostMemAlloc, buffer.data(), bufferSize, 0, 0); diff --git a/opencl/test/unit_test/aub_tests/xe_hpc_core/um_stateless_compression_aub_tests_xe_hpc_core.cpp b/opencl/test/unit_test/aub_tests/xe_hpc_core/um_stateless_compression_aub_tests_xe_hpc_core.cpp index 6846a109ca..74a76920a8 100644 --- a/opencl/test/unit_test/aub_tests/xe_hpc_core/um_stateless_compression_aub_tests_xe_hpc_core.cpp +++ b/opencl/test/unit_test/aub_tests/xe_hpc_core/um_stateless_compression_aub_tests_xe_hpc_core.cpp @@ -199,7 +199,7 @@ XE_HPC_CORETEST_P(UmStatelessCompressionWithStatefulAccess, givenDeviceMemAllocW retVal = commandQueues[0][0]->enqueueKernel(pMultiDeviceKernel->getKernel(rootDeviceIndex), 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); - commandQueues[0][0]->finish(); + commandQueues[0][0]->finish(false); expectMemory(hostMemAlloc, buffer.data(), bufferSize, 0, 0); diff --git a/opencl/test/unit_test/aub_tests/xe_hpg_core/aub_tests_stateless_compression_in_sba_xe_hpg_core.cpp b/opencl/test/unit_test/aub_tests/xe_hpg_core/aub_tests_stateless_compression_in_sba_xe_hpg_core.cpp index 92094084f3..c4ff526de4 100644 --- a/opencl/test/unit_test/aub_tests/xe_hpg_core/aub_tests_stateless_compression_in_sba_xe_hpg_core.cpp +++ b/opencl/test/unit_test/aub_tests/xe_hpg_core/aub_tests_stateless_compression_in_sba_xe_hpg_core.cpp @@ -82,7 +82,7 @@ XE_HPG_CORETEST_P(XeHpgCoreStatelessCompressionInSBA, GENERATEONLY_givenCompress retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); - pCmdQ->finish(); + pCmdQ->finish(false); expectNotEqualMemory(AUBFixture::getGpuPointer(compressedAllocation1), writePattern, bufferSize); @@ -91,7 +91,7 @@ XE_HPG_CORETEST_P(XeHpgCoreStatelessCompressionInSBA, GENERATEONLY_givenCompress retVal = pCmdQ->enqueueCopyBuffer(compressedBuffer2.get(), unCompressedBuffer.get(), 0, 0, bufferSize, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); - pCmdQ->finish(); + pCmdQ->finish(false); expectMemory(AUBFixture::getGpuPointer(unCompressedAllocation), writePattern, bufferSize); } @@ -184,7 +184,7 @@ XE_HPG_CORETEST_P(XeHpgCoreStatelessCompressionInSBA, givenUncompressibleBufferI size_t globalWorkSize[3] = {bufferSize, 1, 1}; retVal = pCmdQ->enqueueKernel(kernel, 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); - pCmdQ->finish(); + pCmdQ->finish(false); expectNotEqualMemory(AUBFixture::getGpuPointer(compressedAllocation), writePattern, bufferSize); @@ -193,7 +193,7 @@ XE_HPG_CORETEST_P(XeHpgCoreStatelessCompressionInSBA, givenUncompressibleBufferI retVal = pCmdQ->enqueueCopyBuffer(uncompressibleBufferInHostMemory.get(), unCompressedBuffer.get(), 0, 0, bufferSize, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); - pCmdQ->finish(); + pCmdQ->finish(false); expectMemory(AUBFixture::getGpuPointer(unCompressedAllocation), writePattern, bufferSize); } @@ -532,7 +532,7 @@ XE_HPG_CORETEST_F(XeHpgCoreStatelessCompressionInSBAWithBCS, GENERATEONLY_givenC retVal = commandQueues[0][0]->enqueueKernel(kernel, 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); - commandQueues[0][0]->finish(); + commandQueues[0][0]->finish(false); expectMemoryNotEqual(AUBFixture::getGpuPointer(compressedAllocation, compressedBuffer->getOffset()), writePattern, bufferSize, 0, 0); @@ -568,7 +568,7 @@ XE_HPG_CORETEST_F(XeHpgCoreStatelessCompressionInSBAWithBCS, givenUncompressible retVal = commandQueues[0][0]->enqueueKernel(kernel, 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, retVal); - commandQueues[0][0]->finish(); + commandQueues[0][0]->finish(false); expectMemoryNotEqual(AUBFixture::getGpuPointer(compressedAllocation, compressedBuffer->getOffset()), writePattern, bufferSize, 0, 0); diff --git a/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp b/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp index 4cd854d7db..db64782e9e 100644 --- a/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp +++ b/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp @@ -863,7 +863,7 @@ HWTEST_TEMPLATED_F(BlitEnqueueWithNoTimestampPacketTests, givenNoTimestampPacket char cpuBuffer[bufferSize]{}; commandQueue->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, bufferSize, cpuBuffer, nullptr, 0, nullptr, nullptr); - commandQueue->finish(); + commandQueue->finish(false); auto bcsCommands = getCmdList(bcsCsr->getCS(0), 0); auto ccsCommands = getCmdList(commandQueue->getCS(0), 0); @@ -1466,7 +1466,7 @@ HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenWaitlistWithTimestampPacketWh mockCmdQueue->flush(); EXPECT_EQ(deferredNodesCount, deferredTimestampPackets->peekNodes().size()); - mockCmdQueue->finish(); + mockCmdQueue->finish(false); EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size()); } diff --git a/opencl/test/unit_test/command_queue/command_queue_hw_1_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_hw_1_tests.cpp index 4b277151a8..4e5b6bb587 100644 --- a/opencl/test/unit_test/command_queue/command_queue_hw_1_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_hw_1_tests.cpp @@ -415,7 +415,7 @@ HWTEST_F(CommandQueueHwTest, GivenNonEmptyQueueOnBlockingWhenMappingBufferThenWi : CommandQueueHw(context, device, 0, false) { finishWasCalled = false; } - cl_int finish() override { + cl_int finish(bool resolvePendingL3Flushes) override { finishWasCalled = true; return 0; } @@ -462,7 +462,7 @@ HWTEST2_F(CommandQueueHwTest, GivenFillBufferBlockedOnUserEventWhenEventIsAborte auto waitingEvent = castToObject(clWaitingEvent); clSetUserEventStatus(clUserEvent, CL_INVALID_VALUE); - cmdQ.finish(); + cmdQ.finish(false); auto timestampPacketNodes = waitingEvent->getTimestampPacketNodes(); ASSERT_NE(timestampPacketNodes, nullptr); @@ -1177,7 +1177,7 @@ HWTEST_F(CommandQueueHwTest, givenCsrClientWhenCallingSyncPointsThenUnregister) EXPECT_EQ(baseNumClients + 1, csr.getNumClients()); - mockCmdQueueHw.finish(); + mockCmdQueueHw.finish(false); EXPECT_EQ(baseNumClients, csr.getNumClients()); // queue synchronized @@ -1276,7 +1276,7 @@ HWTEST_F(CommandQueueHwTest, givenFinishWhenFlushBatchedSubmissionsFailsThenErro MockCommandQueueHwWithOverwrittenCsr cmdQueue(context, pClDevice, nullptr, false); MockCommandStreamReceiverWithFailingFlushBatchedSubmission csr(*pDevice->executionEnvironment, 0, pDevice->getDeviceBitfield()); cmdQueue.csr = &csr; - cl_int errorCode = cmdQueue.finish(); + cl_int errorCode = cmdQueue.finish(false); EXPECT_EQ(CL_OUT_OF_RESOURCES, errorCode); } @@ -1286,7 +1286,7 @@ HWTEST_F(CommandQueueHwTest, givenGpuHangWhenFinishingCommandQueueHwThenWaitForE mockCmdQueueHw.waitForAllEnginesReturnValue = WaitStatus::gpuHang; mockCmdQueueHw.getUltCommandStreamReceiver().shouldFlushBatchedSubmissionsReturnSuccess = true; - const auto finishResult = mockCmdQueueHw.finish(); + const auto finishResult = mockCmdQueueHw.finish(false); EXPECT_EQ(1, mockCmdQueueHw.waitForAllEnginesCalledCount); EXPECT_EQ(CL_OUT_OF_RESOURCES, finishResult); } @@ -1297,7 +1297,7 @@ HWTEST_F(CommandQueueHwTest, givenNoGpuHangWhenFinishingCommandQueueHwThenWaitFo mockCmdQueueHw.waitForAllEnginesReturnValue = WaitStatus::ready; mockCmdQueueHw.getUltCommandStreamReceiver().shouldFlushBatchedSubmissionsReturnSuccess = true; - const auto finishResult = mockCmdQueueHw.finish(); + const auto finishResult = mockCmdQueueHw.finish(false); EXPECT_EQ(1, mockCmdQueueHw.waitForAllEnginesCalledCount); EXPECT_EQ(CL_SUCCESS, finishResult); } diff --git a/opencl/test/unit_test/command_queue/command_queue_hw_2_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_hw_2_tests.cpp index 5173bedf44..fb896049b3 100644 --- a/opencl/test/unit_test/command_queue/command_queue_hw_2_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_hw_2_tests.cpp @@ -104,8 +104,8 @@ HWTEST_F(MultiIoqCmdQSynchronizationTest, givenTwoIoqCmdQsWhenEnqueuesSynchroniz EXPECT_TRUE(pipeControlForBcsSemaphoreFound); } - EXPECT_EQ(CL_SUCCESS, pCmdQ->finish()); - EXPECT_EQ(CL_SUCCESS, pCmdQ2->finish()); + EXPECT_EQ(CL_SUCCESS, pCmdQ->finish(false)); + EXPECT_EQ(CL_SUCCESS, pCmdQ2->finish(false)); clReleaseEvent(outEvent); // tearDown @@ -1367,7 +1367,7 @@ HWTEST_F(OoqCommandQueueHwBlitTest, givenBlitAfterBarrierWhenEnqueueingCommandTh EXPECT_EQ(bcsHwParser.cmdList.end(), pipeControlItor); } - EXPECT_EQ(CL_SUCCESS, pCmdQ->finish()); + EXPECT_EQ(CL_SUCCESS, pCmdQ->finish(false)); } HWTEST_F(OoqCommandQueueHwBlitTest, givenBlitBeforeBarrierWhenEnqueueingCommandThenWaitForBlitBeforeBarrier) { @@ -1454,7 +1454,7 @@ HWTEST_F(OoqCommandQueueHwBlitTest, givenBlitBeforeBarrierWhenEnqueueingCommandT EXPECT_EQ(1u, findAll(bcsHwParser.cmdList.begin(), blitItor).size()); } - EXPECT_EQ(CL_SUCCESS, pCmdQ->finish()); + EXPECT_EQ(CL_SUCCESS, pCmdQ->finish(false)); } HWTEST_F(OoqCommandQueueHwBlitTest, givenBlockedBlitAfterBarrierWhenEnqueueingCommandThenWaitForBlitBeforeBarrier) { @@ -1518,7 +1518,7 @@ HWTEST_F(OoqCommandQueueHwBlitTest, givenBlockedBlitAfterBarrierWhenEnqueueingCo EXPECT_EQ(bcsHwParser.cmdList.end(), find(semaphoreItor, bcsHwParser.cmdList.end())); } - EXPECT_EQ(CL_SUCCESS, pCmdQ->finish()); + EXPECT_EQ(CL_SUCCESS, pCmdQ->finish(false)); } HWTEST_F(CommandQueueHwTest, GivenBuiltinKernelWhenBuiltinDispatchInfoBuilderIsProvidedThenThisBuilderIsUsedForCreatingDispatchInfo) { @@ -1630,7 +1630,7 @@ HWTEST_F(ImageTextureCacheFlushTest, givenTextureCacheFlushNotRequiredWhenEnqueu auto pipeControls = findAll(ccsHwParser.cmdList.begin(), ccsHwParser.cmdList.end()); EXPECT_TRUE(pipeControls.empty()); - EXPECT_EQ(CL_SUCCESS, pCmdQ->finish()); + EXPECT_EQ(CL_SUCCESS, pCmdQ->finish(false)); } HWTEST_F(ImageTextureCacheFlushTest, givenTextureCacheFlushRequiredWhenEnqueueReadImageThenNoCacheFlushSubmitted) { @@ -1668,7 +1668,7 @@ HWTEST_F(ImageTextureCacheFlushTest, givenTextureCacheFlushRequiredWhenEnqueueRe auto pipeControls = findAll(ccsHwParser.cmdList.begin(), ccsHwParser.cmdList.end()); EXPECT_TRUE(pipeControls.empty()); - EXPECT_EQ(CL_SUCCESS, pCmdQ->finish()); + EXPECT_EQ(CL_SUCCESS, pCmdQ->finish(false)); } HWTEST_F(ImageTextureCacheFlushTest, givenTextureCacheFlushRequiredWhenEnqueueWriteImageThenCacheFlushSubmitted) { @@ -1717,7 +1717,7 @@ HWTEST_F(ImageTextureCacheFlushTest, givenTextureCacheFlushRequiredWhenEnqueueWr } } EXPECT_TRUE(isPipeControlWithTextureCacheFlush); - EXPECT_EQ(CL_SUCCESS, pCmdQ->finish()); + EXPECT_EQ(CL_SUCCESS, pCmdQ->finish(false)); } HWTEST_F(IoqCommandQueueHwBlitTest, givenImageWithHostPtrWhenCreateImageThenStopRegularBcs) { diff --git a/opencl/test/unit_test/command_queue/command_queue_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_tests.cpp index 5c25892821..b97c3ec809 100644 --- a/opencl/test/unit_test/command_queue/command_queue_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_tests.cpp @@ -1115,7 +1115,7 @@ HWTEST_F(WaitForQueueCompletionTests, givenBlockingCallAndBlockedQueueWhenEnqueu HWTEST_F(WaitForQueueCompletionTests, whenFinishIsCalledThenCallWaitWithoutQuickKmdSleepRequest) { std::unique_ptr> cmdQ(new MyCmdQueue(context.get(), device.get())); - cmdQ->finish(); + cmdQ->finish(false); EXPECT_EQ(1u, cmdQ->waitUntilCompleteCounter); EXPECT_FALSE(cmdQ->requestedUseQuickKmdSleep); } diff --git a/opencl/test/unit_test/command_queue/enqueue_barrier_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_barrier_tests.cpp index 51b8b8211f..a1fa3e25b8 100644 --- a/opencl/test/unit_test/command_queue/enqueue_barrier_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_barrier_tests.cpp @@ -270,7 +270,7 @@ HWTEST_F(BarrierTest, givenBlockedCommandQueueAndEnqueueBarrierWithWaitlistRetur EXPECT_EQ(pEvent->peekTaskCount(), CompletionStamp::notReady); event2.setStatus(CL_COMPLETE); clReleaseEvent(event); - pCmdQ->finish(); + pCmdQ->finish(false); } HWTEST_F(BarrierTest, givenEmptyCommandStreamAndBlockedBarrierCommandWhenUserEventIsSignaledThenNewCommandStreamIsNotAcquired) { @@ -314,5 +314,5 @@ HWTEST_F(BarrierTest, givenEmptyCommandStreamAndBlockedBarrierCommandWhenUserEve EXPECT_GE(commandStream.getMaxAvailableSpace(), commandStream.getMaxAvailableSpace()); clReleaseEvent(event); - pCmdQ->finish(); + pCmdQ->finish(false); } diff --git a/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp index e860bb7a38..ec3eaa1c52 100644 --- a/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp @@ -594,7 +594,7 @@ HWTEST_F(EnqueueHandlerTest, givenEnableL3FlushAfterPostSyncWithSignalingEventWh auto event = std::make_unique>(context, nullptr, 0, 0, 0); cl_event clEvent = event.get(); - mockCmdQ->setL3FlushDeferredIfNeeded(true); + mockCmdQ->setPendingL3FlushForHostVisibleResources(true); MultiDispatchInfo multiDispatch; const auto enqueueResult = mockCmdQ->template enqueueHandler(nullptr, 0, false, multiDispatch, 0, nullptr, &clEvent); @@ -624,7 +624,7 @@ HWTEST_F(EnqueueHandlerTest, givenL3FlushDeferredIfNeededWhenEnqueueWithoutKerne csr.timestampPacketAllocator.reset(mockTagAllocator); auto mockCmdQ = std::make_unique>(context, pClDevice, nullptr); - mockCmdQ->setL3FlushDeferredIfNeeded(true); + mockCmdQ->setPendingL3FlushForHostVisibleResources(true); MultiDispatchInfo multiDispatch; auto finishCalledCountBefore = mockCmdQ->finishCalledCount; diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp index c482db362e..ed78a8f296 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp @@ -1088,7 +1088,7 @@ HWTEST_TEMPLATED_F(EnqueueKernelTestWithMockCsrHw2, givenCsrInBatchingModeWhenFi pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); - pCmdQ->finish(); + pCmdQ->finish(false); EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); EXPECT_EQ(mockCsr->heaplessStateInitialized ? 2u : 1u, mockCsr->flushCalledCount); @@ -1113,7 +1113,7 @@ HWTEST_TEMPLATED_F(EnqueueKernelTestWithMockCsrHw2, givenCsrInBatchingModeWhenTh pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); - pCmdQ->finish(); + pCmdQ->finish(false); EXPECT_TRUE(mockedSubmissionsAggregator->peekCmdBufferList().peekIsEmpty()); EXPECT_EQ(mockCsr->heaplessStateInitialized ? 2u : 1u, mockCsr->flushCalledCount); diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_event_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_event_tests.cpp index 88a7cbb6c1..a2557a0d0f 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_event_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_event_tests.cpp @@ -97,7 +97,7 @@ TEST_F(EventTests, WhenWaitingForEventThenPipeControlIsNotInserted) { } EXPECT_EQ(expectedTaskLevel, csr.peekTaskLevel()); - pCmdQ->finish(); + pCmdQ->finish(false); // Check CL_EVENT_COMMAND_TYPE { @@ -145,7 +145,7 @@ TEST_F(EventTests, GivenTwoEnqueuesWhenWaitingForBothEventsThenTaskLevelIsCorrec expectedTaskLevel1++; } EXPECT_EQ(expectedTaskLevel1, csr.peekTaskLevel()); - pCmdQ->finish(); + pCmdQ->finish(false); EXPECT_EQ(expectedTaskLevel1, csr.peekTaskLevel()); // Check CL_EVENT_COMMAND_TYPE { @@ -198,7 +198,7 @@ TEST_F(EventTests, GivenNoEventsWhenEnqueuingKernelThenTaskLevelIsIncremented) { } EXPECT_EQ(taskLevelEvent, csr.peekTaskLevel()); - pCmdQ->finish(); + pCmdQ->finish(false); EXPECT_EQ(taskLevelEvent, csr.peekTaskLevel()); // Check CL_EVENT_COMMAND_TYPE @@ -281,6 +281,6 @@ HWTEST_F(EventTests, givenEnqueueKernelBlockedOnserEventWhenEnqueueHasOutEventWi EXPECT_NE(pipeControlItor, ccsHwParser.cmdList.end()); } - EXPECT_EQ(CL_SUCCESS, pCmdQ->finish()); + EXPECT_EQ(CL_SUCCESS, pCmdQ->finish(false)); clReleaseEvent(outEvent); } \ No newline at end of file diff --git a/opencl/test/unit_test/command_queue/enqueue_read_buffer_rect_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_read_buffer_rect_tests.cpp index d3afce5ecf..d6cbc8b5b1 100644 --- a/opencl/test/unit_test/command_queue/enqueue_read_buffer_rect_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_read_buffer_rect_tests.cpp @@ -596,7 +596,7 @@ HWTEST_F(EnqueueReadWriteBufferRectDispatch, givenOffsetResultingInMisalignedPtr ASSERT_NE(0u, cmdQ->lastEnqueuedKernels.size()); Kernel *kernel = cmdQ->lastEnqueuedKernels[0]; - cmdQ->finish(); + cmdQ->finish(false); parseCommands(*cmdQ); diff --git a/opencl/test/unit_test/command_queue/enqueue_read_buffer_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_read_buffer_tests.cpp index a84bc1b8a0..46a39e35e1 100644 --- a/opencl/test/unit_test/command_queue/enqueue_read_buffer_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_read_buffer_tests.cpp @@ -915,7 +915,7 @@ HWTEST_F(ReadBufferStagingBufferTest, whenHostPtrRegisteredThenDontUseStagingUnt EXPECT_TRUE(mockCommandQueueHw.isValidForStagingTransfer(&buffer, ptr, MemoryConstants::cacheLineSize, CL_COMMAND_READ_BUFFER, false, false)); EXPECT_FALSE(mockCommandQueueHw.isValidForStagingTransfer(&buffer, ptr, MemoryConstants::cacheLineSize, CL_COMMAND_READ_BUFFER, false, false)); - mockCommandQueueHw.finish(); + mockCommandQueueHw.finish(false); EXPECT_TRUE(mockCommandQueueHw.isValidForStagingTransfer(&buffer, ptr, MemoryConstants::cacheLineSize, CL_COMMAND_READ_BUFFER, false, false)); } diff --git a/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp index 282c1dceee..caeffd0235 100644 --- a/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp @@ -94,7 +94,7 @@ HWTEST_F(EnqueueReadImageTest, whenEnqueueReadImageThenBuiltinKernelIsResolved) EXPECT_TRUE(pCommand->peekKernel()->isPatched()); userEvent.setStatus(CL_COMPLETE); pEvent->release(); - pCmdQ->finish(); + pCmdQ->finish(false); } template @@ -266,7 +266,7 @@ HWTEST_F(EnqueueReadImageTest, givenMultiRootDeviceImageWhenEnqueueReadImageThen EXPECT_EQ(0u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); pEvent->release(); - pCmdQ1->finish(); + pCmdQ1->finish(false); pCmdQ1->release(); pImage->release(); } @@ -347,7 +347,7 @@ HWTEST_F(EnqueueReadImageTest, givenMultiRootDeviceImageWhenEnqueueReadImageIsCa EXPECT_EQ(0u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); pEvent0->release(); pEvent1->release(); - pCmdQ1->finish(); + pCmdQ1->finish(false); pCmdQ1->release(); pImage->release(); } @@ -370,7 +370,7 @@ HWTEST_F(EnqueueReadImageTest, givenMultiRootDeviceImageWhenNonBlockedEnqueueRea EXPECT_TRUE(ultCsr.flushBatchedSubmissionsCalled); EXPECT_TRUE(ultCsr.flushTagUpdateCalled); EXPECT_LT(currentTaskCount, ultCsr.peekTaskCount()); - pCmdQ1->finish(); + pCmdQ1->finish(false); pCmdQ1->release(); pImage->release(); } @@ -389,7 +389,7 @@ HWTEST_F(EnqueueReadImageTest, givenMultiRootDeviceImageWhenNonBlockedEnqueueRea EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ1, pImage, CL_FALSE); EXPECT_EQ(0u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); - pCmdQ1->finish(); + pCmdQ1->finish(false); { HardwareParse hwParser; @@ -444,7 +444,7 @@ HWTEST_F(EnqueueReadImageTest, givenMultiRootDeviceImageWhenEnqueueReadImageIsCa EXPECT_TRUE(ultCsr1.flushTagUpdateCalled); EXPECT_FALSE(ultCsr2.flushBatchedSubmissionsCalled); EXPECT_LT(currentTaskCount1, ultCsr1.peekTaskCount()); - pCmdQ1->finish(); + pCmdQ1->finish(false); EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ2, pImage, CL_FALSE, EnqueueReadImageTraits::origin, @@ -461,7 +461,7 @@ HWTEST_F(EnqueueReadImageTest, givenMultiRootDeviceImageWhenEnqueueReadImageIsCa EXPECT_TRUE(ultCsr2.flushBatchedSubmissionsCalled); EXPECT_TRUE(ultCsr2.flushTagUpdateCalled); EXPECT_LT(currentTaskCount2, ultCsr2.peekTaskCount()); - pCmdQ2->finish(); + pCmdQ2->finish(false); EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ1, pImage, CL_FALSE, EnqueueReadImageTraits::origin, @@ -475,7 +475,7 @@ HWTEST_F(EnqueueReadImageTest, givenMultiRootDeviceImageWhenEnqueueReadImageIsCa nullptr); EXPECT_EQ(0u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); - pCmdQ1->finish(); + pCmdQ1->finish(false); pCmdQ1->release(); pCmdQ2->release(); pImage->release(); @@ -536,7 +536,7 @@ HWTEST2_F(EnqueueReadImageTest, givenImageFromBufferThatRequiresMigrationWhenEnq EXPECT_EQ(0u, pBuffer->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); pEvent->release(); - pCmdQ1->finish(); + pCmdQ1->finish(false); pCmdQ1->release(); pImage->release(); pBuffer->release(); @@ -1094,7 +1094,7 @@ HWTEST_F(EnqueueReadImageTest, whenEnqueueReadImageWithUsmPtrThenDontImportAlloc 0u, nullptr, nullptr); - pCmdQ->finish(); + pCmdQ->finish(false); auto &csr = pDevice->getUltCommandStreamReceiver(); EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled); diff --git a/opencl/test/unit_test/command_queue/enqueue_thread_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_thread_tests.cpp index 825e48c560..8e181fa1cd 100644 --- a/opencl/test/unit_test/command_queue/enqueue_thread_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_thread_tests.cpp @@ -490,6 +490,6 @@ HWTEST_F(EnqueueThreading, WhenFinishingThenKernelHasOwnership) { csr->latestSentTaskCount = 1; csr->latestFlushedTaskCount = 1; - pCmdQ->finish(); + pCmdQ->finish(false); } } // namespace ULT diff --git a/opencl/test/unit_test/command_queue/enqueue_write_buffer_rect_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_write_buffer_rect_tests.cpp index db39087825..4866c41c60 100644 --- a/opencl/test/unit_test/command_queue/enqueue_write_buffer_rect_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_write_buffer_rect_tests.cpp @@ -602,7 +602,7 @@ HWTEST_F(EnqueueReadWriteBufferRectDispatch, givenOffsetResultingInMisalignedPtr ASSERT_NE(0u, cmdQ->lastEnqueuedKernels.size()); Kernel *kernel = cmdQ->lastEnqueuedKernels[0]; - cmdQ->finish(); + cmdQ->finish(false); parseCommands(*cmdQ); auto &kernelInfo = kernel->getKernelInfo(); diff --git a/opencl/test/unit_test/command_queue/enqueue_write_buffer_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_write_buffer_tests.cpp index bca29b934b..d9eb562b1f 100644 --- a/opencl/test/unit_test/command_queue/enqueue_write_buffer_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_write_buffer_tests.cpp @@ -695,7 +695,7 @@ HWTEST_F(WriteBufferStagingBufferTest, whenHostPtrRegisteredThenDontUseStagingUn EXPECT_TRUE(mockCommandQueueHw.isValidForStagingTransfer(&buffer, ptr, MemoryConstants::cacheLineSize, CL_COMMAND_WRITE_BUFFER, false, false)); EXPECT_FALSE(mockCommandQueueHw.isValidForStagingTransfer(&buffer, ptr, MemoryConstants::cacheLineSize, CL_COMMAND_WRITE_BUFFER, false, false)); - mockCommandQueueHw.finish(); + mockCommandQueueHw.finish(false); EXPECT_TRUE(mockCommandQueueHw.isValidForStagingTransfer(&buffer, ptr, MemoryConstants::cacheLineSize, CL_COMMAND_WRITE_BUFFER, false, false)); } diff --git a/opencl/test/unit_test/command_queue/enqueue_write_image_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_write_image_tests.cpp index 17e425e584..93643d879b 100644 --- a/opencl/test/unit_test/command_queue/enqueue_write_image_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_write_image_tests.cpp @@ -574,7 +574,7 @@ HWTEST_F(EnqueueWriteImageTest, whenEnqueueWriteImageThenBuiltinKernelIsResolved EXPECT_TRUE(pCommand->peekKernel()->isPatched()); userEvent.setStatus(CL_COMPLETE); pEvent->release(); - pCmdQ->finish(); + pCmdQ->finish(false); } HWTEST_F(EnqueueWriteImageTest, givenMultiRootDeviceImageWhenEnqueueWriteImageThenKernelRequiresMigration) { @@ -622,7 +622,7 @@ HWTEST_F(EnqueueWriteImageTest, givenMultiRootDeviceImageWhenEnqueueWriteImageTh EXPECT_EQ(0u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); pEvent->release(); - pCmdQ1->finish(); + pCmdQ1->finish(false); pCmdQ1->release(); pImage->release(); } @@ -703,7 +703,7 @@ HWTEST_F(EnqueueWriteImageTest, givenMultiRootDeviceImageWhenEnqueueWriteImageIs EXPECT_EQ(0u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); pEvent0->release(); pEvent1->release(); - pCmdQ1->finish(); + pCmdQ1->finish(false); pCmdQ1->release(); pImage->release(); } @@ -726,7 +726,7 @@ HWTEST_F(EnqueueWriteImageTest, givenMultiRootDeviceImageWhenNonBlockedEnqueueWr EXPECT_TRUE(ultCsr.flushBatchedSubmissionsCalled); EXPECT_TRUE(ultCsr.flushTagUpdateCalled); EXPECT_LT(currentTaskCount, ultCsr.peekTaskCount()); - pCmdQ1->finish(); + pCmdQ1->finish(false); pCmdQ1->release(); pImage->release(); } @@ -748,7 +748,7 @@ HWTEST_F(EnqueueWriteImageTest, givenMultiRootDeviceImageWhenNonBlockedEnqueueWr EnqueueWriteImageHelper<>::enqueueWriteImage(pCmdQ1, pImage, CL_FALSE); EXPECT_EQ(0u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); - pCmdQ1->finish(); + pCmdQ1->finish(false); { HardwareParse hwParser; @@ -788,7 +788,7 @@ HWTEST_F(EnqueueWriteImageTest, whenEnqueueWriteImageWithUsmPtrThenDontImportAll nullptr, nullptr); EXPECT_EQ(res, CL_SUCCESS); - pCmdQ->finish(); + pCmdQ->finish(false); auto &csr = pDevice->getUltCommandStreamReceiver(); EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled); @@ -813,7 +813,7 @@ HWTEST_F(EnqueueWriteImageTest, whenEnqueueWriteImageWithUsmPtrAndSizeLowerThanR nullptr, nullptr); EXPECT_EQ(res, CL_INVALID_OPERATION); - pCmdQ->finish(); + pCmdQ->finish(false); svmManager->freeSVMAlloc(usmPtr); } @@ -823,11 +823,11 @@ HWTEST_F(EnqueueWriteImageTest, whenisValidForStagingTransferCalledThenReturnCor std::unique_ptr image(Image1dHelperUlt<>::create(context)); EXPECT_EQ(isStagingBuffersEnabled, pCmdQ->isValidForStagingTransfer(image.get(), ptr, image->getSize(), CL_COMMAND_WRITE_IMAGE, false, false)); - pCmdQ->finish(); + pCmdQ->finish(false); image.reset(Image2dHelperUlt<>::create(context)); EXPECT_EQ(isStagingBuffersEnabled, pCmdQ->isValidForStagingTransfer(image.get(), ptr, image->getSize(), CL_COMMAND_WRITE_IMAGE, false, false)); - pCmdQ->finish(); + pCmdQ->finish(false); image.reset(Image3dHelperUlt<>::create(context)); EXPECT_EQ(isStagingBuffersEnabled, pCmdQ->isValidForStagingTransfer(image.get(), ptr, image->getSize(), CL_COMMAND_WRITE_IMAGE, false, false)); diff --git a/opencl/test/unit_test/command_queue/finish_tests.cpp b/opencl/test/unit_test/command_queue/finish_tests.cpp index 089d1d8225..5fd5233757 100644 --- a/opencl/test/unit_test/command_queue/finish_tests.cpp +++ b/opencl/test/unit_test/command_queue/finish_tests.cpp @@ -58,7 +58,7 @@ HWTEST_F(FinishTest, GivenCsGreaterThanCqWhenFinishIsCalledThenPipeControlIsNotA commandStreamReceiver.taskLevel = originalCSRLevel; // Must be greater than or equal to HW pCmdQ->taskLevel = originalCQLevel; - auto retVal = pCmdQ->finish(); + auto retVal = pCmdQ->finish(false); ASSERT_EQ(CL_SUCCESS, retVal); // Don't need to artificially execute PIPE_CONTROL. @@ -80,7 +80,7 @@ HWTEST_F(FinishTest, GivenCsGreaterThanCqWhenFinishIsCalledThenPipeControlIsNotA HWTEST_F(FinishTest, WhenFinishIsCalledThenPipeControlIsNotAddedToCqCommandStream) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; - auto retVal = pCmdQ->finish(); + auto retVal = pCmdQ->finish(false); ASSERT_EQ(CL_SUCCESS, retVal); // Check for PIPE_CONTROL @@ -93,7 +93,7 @@ HWTEST_F(FinishTest, givenFreshQueueWhenFinishIsCalledThenCommandStreamIsNotAllo MockContext contextWithMockCmdQ(pClDevice, true); MockCommandQueueHw cmdQ(&contextWithMockCmdQ, pClDevice, 0); - auto retVal = cmdQ.finish(); + auto retVal = cmdQ.finish(false); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(nullptr, cmdQ.peekCommandStream()); @@ -113,16 +113,15 @@ HWTEST_F(FinishTest, givenL3FlushAfterPostSyncEnabledWhenFlushTagUpdateIsCalledT MockContext contextWithMockCmdQ(pClDevice, true); MockCommandQueueHw cmdQ(&contextWithMockCmdQ, pClDevice, 0); - cmdQ.setL3FlushDeferredIfNeeded(true); + cmdQ.setPendingL3FlushForHostVisibleResources(true); cmdQ.l3FlushAfterPostSyncEnabled = true; - cmdQ.setCheckIfDeferredL3FlushIsNeeded(true); auto &csr = cmdQ.getUltCommandStreamReceiver(); auto used = csr.commandStream.getUsed(); auto taskCountBeforeFinish = csr.taskCount.load(); auto beforeWaitForAllEnginesCalledCount = cmdQ.waitForAllEnginesCalledCount; - auto retVal = cmdQ.finish(); + auto retVal = cmdQ.finish(true); ASSERT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(taskCountBeforeFinish + 1, cmdQ.latestTaskCountWaited); @@ -157,7 +156,7 @@ HWTEST_F(FinishTest, givenL3FlushDeferredIfNeededAndL3FlushAfterPostSyncEnabledW MockContext contextWithMockCmdQ(pClDevice, true); MockCommandQueueHw cmdQ(&contextWithMockCmdQ, pClDevice, 0); - cmdQ.setL3FlushDeferredIfNeeded(true); + cmdQ.setPendingL3FlushForHostVisibleResources(true); cmdQ.l3FlushAfterPostSyncEnabled = true; size_t offset = 0; diff --git a/opencl/test/unit_test/command_queue/get_size_required_tests.cpp b/opencl/test/unit_test/command_queue/get_size_required_tests.cpp index 1be195cd15..26588269bd 100644 --- a/opencl/test/unit_test/command_queue/get_size_required_tests.cpp +++ b/opencl/test/unit_test/command_queue/get_size_required_tests.cpp @@ -43,7 +43,7 @@ HWTEST_F(GetSizeRequiredTest, WhenFinishingThenHeapsAndCommandBufferAreNotConsum auto &commandStream = pCmdQ->getCS(1024); auto usedBeforeCS = commandStream.getUsed(); - auto retVal = pCmdQ->finish(); + auto retVal = pCmdQ->finish(false); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(0u, commandStream.getUsed() - usedBeforeCS); diff --git a/opencl/test/unit_test/command_queue/oom_tests.cpp b/opencl/test/unit_test/command_queue/oom_tests.cpp index ccc2ee5c62..c1f3a062b9 100644 --- a/opencl/test/unit_test/command_queue/oom_tests.cpp +++ b/opencl/test/unit_test/command_queue/oom_tests.cpp @@ -75,7 +75,7 @@ HWTEST_P(OOMCommandQueueTest, WhenFinishingThenMaxAvailableSpaceIsNotExceeded) { auto usedBeforeCS = commandStream.getUsed(); auto usedBeforeISH = indirectHeap.getUsed(); - auto retVal = pCmdQ->finish(); + auto retVal = pCmdQ->finish(false); auto usedAfterCS = commandStream.getUsed(); auto usedAfterISH = indirectHeap.getUsed(); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp index 9aa97b7fd9..9bf112bc8e 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp @@ -181,9 +181,9 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEmptyQueueWhenFinishingThenTa commandStreamReceiver.taskCount = taskCount; EXPECT_EQ(commandStreamReceiver.heaplessStateInitialized ? 1u : 0u, commandStreamReceiver.peekLatestSentTaskCount()); - mockCmdQueue.finish(); + mockCmdQueue.finish(false); EXPECT_EQ(commandStreamReceiver.heaplessStateInitialized ? 1u : 0u, commandStreamReceiver.peekLatestSentTaskCount()); - mockCmdQueue.finish(); + mockCmdQueue.finish(false); // nothings sent to the HW, no need to bump tags EXPECT_EQ(commandStreamReceiver.heaplessStateInitialized ? 1u : 0u, commandStreamReceiver.peekLatestSentTaskCount()); EXPECT_EQ(0u, mockCmdQueue.latestTaskCountWaited); @@ -222,13 +222,13 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenNonDcFlushWithInitialTaskCoun // finish after enqueued kernel(cmdq task count = 1) mockCmdQueue.enqueueKernel(kernel, 1, nullptr, &gws, nullptr, 0, nullptr, nullptr); - mockCmdQueue.finish(); + mockCmdQueue.finish(false); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); EXPECT_EQ(1u, mockCmdQueue.latestTaskCountWaited); EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount()); // finish again - dont call flush task - mockCmdQueue.finish(); + mockCmdQueue.finish(false); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); EXPECT_EQ(1u, mockCmdQueue.latestTaskCountWaited); EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount()); @@ -263,12 +263,12 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenDcFlushWhenFinishingThenTaskC EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); // cmdQ task count = 2, finish again - mockCmdQueue.finish(); + mockCmdQueue.finish(false); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); // finish again - dont flush task again - mockCmdQueue.finish(); + mockCmdQueue.finish(false); EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount()); @@ -370,7 +370,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenNonBlockingMapEnqueueWhenFini EXPECT_EQ(expectedTaskCount, commandStreamReceiver.peekLatestSentTaskCount()); - commandQueue.finish(); + commandQueue.finish(false); EXPECT_EQ(expectedTaskCount, commandStreamReceiver.peekLatestSentTaskCount()); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp index 8a1f99f729..5c8dfbe162 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp @@ -423,8 +423,8 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW EXPECT_EQ(0u, semaphores.size()); } userEvent1.setStatus(CL_COMPLETE); - pCmdQ1->finish(); - pCmdQ2->finish(); + pCmdQ1->finish(false); + pCmdQ2->finish(false); { HardwareParse csHwParser; csHwParser.parseCommands(pCmdQ1->getGpgpuCommandStreamReceiver().getCS(0)); @@ -510,7 +510,7 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW userEvent1.setStatus(CL_COMPLETE); event1->release(); event2->release(); - pCmdQ1->finish(); + pCmdQ1->finish(false); { HardwareParse csHwParser; csHwParser.parseCommands(pCmdQ1->getGpgpuCommandStreamReceiver().getCS(0)); @@ -610,8 +610,8 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW EXPECT_EQ(0u, semaphores.size()); } userEvent1.setStatus(CL_COMPLETE); - pCmdQ1->finish(); - pCmdQ2->finish(); + pCmdQ1->finish(false); + pCmdQ2->finish(false); { HardwareParse csHwParser; @@ -690,8 +690,8 @@ HWTEST_F(MultiRootDeviceCommandStreamReceiverTests, givenUnflushedQueueAndEventI EXPECT_TRUE(pCmdQ1->getGpgpuCommandStreamReceiver().isLatestTaskCountFlushed()); castToObject(inputEvent)->release(); castToObject(outputEvent)->release(); - pCmdQ1->finish(); - pCmdQ2->finish(); + pCmdQ1->finish(false); + pCmdQ2->finish(false); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenStaticPartitioningEnabledWhenFlushingTaskThenWorkPartitionAllocationIsMadeResident) { @@ -987,7 +987,7 @@ HWTEST_F(BcsCrossDeviceMigrationTests, givenBufferWithMultiStorageWhenEnqueueRea retVal = cmdQueue->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, size, hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); - cmdQueue->finish(); + cmdQueue->finish(false); EXPECT_TRUE(cmdQueue->migrateMultiGraphicsAllocationsIfRequiredCalled); diff --git a/opencl/test/unit_test/d3d_sharing/d3d_tests_part2.cpp b/opencl/test/unit_test/d3d_sharing/d3d_tests_part2.cpp index 891a6c2fd0..fa515d449d 100644 --- a/opencl/test/unit_test/d3d_sharing/d3d_tests_part2.cpp +++ b/opencl/test/unit_test/d3d_sharing/d3d_tests_part2.cpp @@ -36,7 +36,7 @@ TYPED_TEST_P(D3DTests, givenSharedResourceBufferAndInteropUserSyncEnabledWhenRel class MockCmdQ : public MockCommandQueue { public: MockCmdQ(Context *context, ClDevice *device, const cl_queue_properties *properties) : MockCommandQueue(context, device, properties, false){}; - cl_int finish() override { + cl_int finish(bool resolvePendingL3Flushes) override { finishCalled++; return CL_SUCCESS; } @@ -66,7 +66,7 @@ TYPED_TEST_P(D3DTests, givenNonSharedResourceBufferAndInteropUserSyncDisabledWhe class MockCmdQ : public MockCommandQueue { public: MockCmdQ(Context *context, ClDevice *device, const cl_queue_properties *properties) : MockCommandQueue(context, device, properties, false){}; - cl_int finish() override { + cl_int finish(bool resolvePendingL3Flushes) override { finishCalled++; return CL_SUCCESS; } @@ -98,7 +98,7 @@ TYPED_TEST_P(D3DTests, givenSharedResourceBufferAndInteropUserSyncDisabledWhenRe class MockCmdQ : public MockCommandQueue { public: MockCmdQ(Context *context, ClDevice *device, const cl_queue_properties *properties) : MockCommandQueue(context, device, properties, false){}; - cl_int finish() override { + cl_int finish(bool resolvePendingL3Flushes) override { finishCalled++; return CL_SUCCESS; } @@ -128,7 +128,7 @@ TYPED_TEST_P(D3DTests, givenNonSharedResourceBufferAndInteropUserSyncEnabledWhen class MockCmdQ : public MockCommandQueue { public: MockCmdQ(Context *context, ClDevice *device, const cl_queue_properties *properties) : MockCommandQueue(context, device, properties, false){}; - cl_int finish() override { + cl_int finish(bool resolvePendingL3Flushes) override { finishCalled++; return CL_SUCCESS; } diff --git a/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp b/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp index c6733ae560..7c580cc46f 100644 --- a/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp +++ b/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp @@ -633,7 +633,7 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingThe cmdQ->flush(); EXPECT_EQ(2u, deferredTimestampPackets->peekNodes().size()); - cmdQ->finish(); + cmdQ->finish(false); EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size()); } @@ -664,7 +664,7 @@ HWTEST_F(TimestampPacketTests, givenWaitlistWithTimestampPacketWhenEnqueueingThe ASSERT_GT(deferredTimestampPackets->peekNodes().size(), 0u); EXPECT_EQ(timestamp.peekNodes()[0]->getGpuAddress(), deferredTimestampPackets->peekNodes()[0]->getGpuAddress()); - cmdQ->finish(); + cmdQ->finish(false); EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size()); } @@ -724,7 +724,7 @@ HWTEST_F(TimestampPacketTests, givenTimestampWaitEnabledWhenEnqueueWithEventThen EXPECT_TRUE(csr.downloadAllocationCalled); csr.downloadAllocationCalled = false; - cmdQ->finish(); + cmdQ->finish(false); EXPECT_TRUE(event1.isCompleted()); EXPECT_TRUE(event2.isCompleted()); @@ -799,7 +799,7 @@ HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitForQueuesWhenFinishWithou EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size()); EXPECT_EQ(0u, timestampPacketContainer->peekNodes().size()); - cmdQ->finish(); + cmdQ->finish(false); EXPECT_EQ(csr.waitForCompletionWithTimeoutTaskCountCalled, 1u); } @@ -830,7 +830,7 @@ HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitForQueuesWhenFinishThenWa timestampPacketContainer->peekNodes()[0]->assignDataToAllTimestamps(i, timestampData); } - cmdQ->finish(); + cmdQ->finish(false); EXPECT_EQ(csr.waitForCompletionWithTimeoutTaskCountCalled, 0u); } @@ -856,7 +856,7 @@ HWTEST_F(TimestampPacketTests, givenOOQAndEnableTimestampWaitForQueuesWhenFinish EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size()); EXPECT_EQ(0u, timestampPacketContainer->peekNodes().size()); - cmdQ->finish(); + cmdQ->finish(false); EXPECT_EQ(csr.waitForCompletionWithTimeoutTaskCountCalled, 1u); @@ -890,7 +890,7 @@ HWTEST_F(TimestampPacketTests, givenOOQAndWithoutEventWhenEnqueueCalledThenMoveC EXPECT_EQ(1u, deferredTimestampPackets->peekNodes().size()); EXPECT_EQ(0u, timestampPacketContainer->peekNodes().size()); - cmdQ->finish(); + cmdQ->finish(false); clReleaseEvent(event); @@ -911,7 +911,7 @@ HWTEST_F(TimestampPacketTests, whenReleaseEventThenWait) { cl_event event; cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &event); - cmdQ->finish(); + cmdQ->finish(false); EXPECT_EQ(csr.waitForCompletionWithTimeoutTaskCountCalled, 1u); clReleaseEvent(event); @@ -1058,7 +1058,7 @@ HWTEST_F(TimestampPacketTests, givenNewSubmissionWhileWaitingThenDontReleaseDefe auto tagAddress = csr.getTagAddress(); *tagAddress = cmdQ->getHeaplessStateInitEnabled() ? 3 : 2; - cmdQ->finish(); + cmdQ->finish(false); EXPECT_EQ(1u, deferredTimestampPackets->peekNodes().size()); EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size()); @@ -1110,7 +1110,7 @@ HWTEST_F(TimestampPacketTests, givenNewBcsSubmissionWhileWaitingThenDontReleaseD auto tagAddress = csr.getTagAddress(); *tagAddress = cmdQ->getHeaplessStateInitEnabled() ? 3 : 2; - cmdQ->finish(); + cmdQ->finish(false); EXPECT_EQ(cmdQ->getHeaplessStateInitEnabled() ? 4u : 3u, cmdQ->bcsStates[0].taskCount); @@ -1171,7 +1171,7 @@ HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitForQueuesWhenFinishThenCa CpuIntrinsicsTests::pauseCounter = 0u; EXPECT_FALSE(device->getUltCommandStreamReceiver().downloadAllocationCalled); - cmdQ->finish(); + cmdQ->finish(false); EXPECT_EQ(1u, CpuIntrinsicsTests::pauseCounter); EXPECT_TRUE(device->getUltCommandStreamReceiver().downloadAllocationCalled); diff --git a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp index 0ed0c77235..49337eb3bf 100644 --- a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp @@ -1554,7 +1554,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockedEnqueueWhenUsingBcsThenWaitForVal userEvent.setStatus(CL_COMPLETE); EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); - cmdQ->finish(); + cmdQ->finish(false); EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); } diff --git a/opencl/test/unit_test/mem_obj/image_release_mapped_ptr_tests.cpp b/opencl/test/unit_test/mem_obj/image_release_mapped_ptr_tests.cpp index bd307c5afb..1096ef657a 100644 --- a/opencl/test/unit_test/mem_obj/image_release_mapped_ptr_tests.cpp +++ b/opencl/test/unit_test/mem_obj/image_release_mapped_ptr_tests.cpp @@ -40,7 +40,7 @@ class MyMockCommandQueue : public CommandQueueHw { enqueueWriteImageCalled++; return CL_SUCCESS; } - cl_int finish() override { + cl_int finish(bool resolvePendingL3Flushes) override { finishCalled++; return CL_SUCCESS; } diff --git a/opencl/test/unit_test/memory_manager/cpu_page_fault_manager_memory_sync_tests.cpp b/opencl/test/unit_test/memory_manager/cpu_page_fault_manager_memory_sync_tests.cpp index a9cc37bdbd..e3de01ecc6 100644 --- a/opencl/test/unit_test/memory_manager/cpu_page_fault_manager_memory_sync_tests.cpp +++ b/opencl/test/unit_test/memory_manager/cpu_page_fault_manager_memory_sync_tests.cpp @@ -35,7 +35,7 @@ struct CommandQueueMock : public MockCommandQueue { passedMapFlags = mapFlags; return CL_SUCCESS; } - cl_int finish() override { + cl_int finish(bool resolvePendingL3Flushes) override { finishCalled++; return CL_SUCCESS; } diff --git a/opencl/test/unit_test/mocks/mock_command_queue.h b/opencl/test/unit_test/mocks/mock_command_queue.h index 550eabe3ae..0dda9f9cd5 100644 --- a/opencl/test/unit_test/mocks/mock_command_queue.h +++ b/opencl/test/unit_test/mocks/mock_command_queue.h @@ -226,13 +226,16 @@ class MockCommandQueue : public CommandQueue { cl_int enqueueResourceBarrier(BarrierCommand *resourceBarrier, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override { return CL_SUCCESS; } - cl_int finish() override { + cl_int finish(bool resolvePendingL3Flushes) override { ++finishCalledCount; return CL_SUCCESS; } cl_int flush() override { return CL_SUCCESS; } + void programPendingL3Flushes(CommandStreamReceiver &csr, bool &waitForTaskCountRequired, bool resolvePendingL3Flushes) override { + } + bool waitForTimestamps(std::span copyEnginesToWait, WaitStatus &status, TimestampPacketContainer *mainContainer, TimestampPacketContainer *deferredContainer) override { waitForTimestampsCalled = true; return false; @@ -521,9 +524,9 @@ class MockCommandQueueHw : public CommandQueueHw { return BaseClass::enqueueSVMMemcpy(blockingCopy, dstPtr, srcPtr, size, numEventsInWaitList, eventWaitList, event, csrParam); } - cl_int finish() override { + cl_int finish(bool resolvePendingL3Flushes) override { finishCalledCount++; - return BaseClass::finish(); + return BaseClass::finish(resolvePendingL3Flushes); } LinearStream *peekCommandStream() { diff --git a/opencl/test/unit_test/mt_tests/command_queue/enqueue_kernel_mt_tests.cpp b/opencl/test/unit_test/mt_tests/command_queue/enqueue_kernel_mt_tests.cpp index 6090aa6f7b..0bb042a81f 100644 --- a/opencl/test/unit_test/mt_tests/command_queue/enqueue_kernel_mt_tests.cpp +++ b/opencl/test/unit_test/mt_tests/command_queue/enqueue_kernel_mt_tests.cpp @@ -80,7 +80,7 @@ HWTEST_TEMPLATED_F(EnqueueKernelTestWithMockCsrHw2, givenCsrInBatchingModeWhenFi thread.join(); } - pCmdQ->finish(); + pCmdQ->finish(false); EXPECT_GE(mockCsr->flushCalledCount, 1u); diff --git a/opencl/test/unit_test/profiling/profiling_tests.cpp b/opencl/test/unit_test/profiling/profiling_tests.cpp index 33767a1e44..b338bbae58 100644 --- a/opencl/test/unit_test/profiling/profiling_tests.cpp +++ b/opencl/test/unit_test/profiling/profiling_tests.cpp @@ -467,7 +467,7 @@ HWTEST_F(ProfilingTests, givenBarrierEnqueueWhenNonBlockedEnqueueThenSetGpuPath) pCmdQ->enqueueBarrierWithWaitList(0, nullptr, &event); auto eventObj = static_cast(event); EXPECT_FALSE(eventObj->isCPUProfilingPath()); - pCmdQ->finish(); + pCmdQ->finish(false); uint64_t queued, submit; cl_int retVal; @@ -488,7 +488,7 @@ HWTEST_F(ProfilingTests, givenMarkerEnqueueWhenNonBlockedEnqueueThenSetGpuPath) pCmdQ->enqueueMarkerWithWaitList(0, nullptr, &event); auto eventObj = static_cast(event); EXPECT_FALSE(eventObj->isCPUProfilingPath()); - pCmdQ->finish(); + pCmdQ->finish(false); uint64_t queued, submit; cl_int retVal; @@ -559,7 +559,7 @@ HWTEST_F(ProfilingTests, givenNonKernelEnqueueWhenNonBlockedEnqueueThenSetCpuPat &event); auto eventObj = static_cast(event); EXPECT_TRUE(eventObj->isCPUProfilingPath() == CL_TRUE); - pCmdQ->finish(); + pCmdQ->finish(false); uint64_t queued, submit, start, end; diff --git a/opencl/test/unit_test/scenarios/windows/enqueue_read_write_buffer_scenarios_windows_tests.cpp b/opencl/test/unit_test/scenarios/windows/enqueue_read_write_buffer_scenarios_windows_tests.cpp index 9f79165be0..7eb91bcef9 100644 --- a/opencl/test/unit_test/scenarios/windows/enqueue_read_write_buffer_scenarios_windows_tests.cpp +++ b/opencl/test/unit_test/scenarios/windows/enqueue_read_write_buffer_scenarios_windows_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2023 Intel Corporation + * Copyright (C) 2019-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -106,7 +106,7 @@ HWTEST_F(EnqueueBufferWindowsTest, givenMisalignedHostPtrWhenEnqueueReadBufferCa ASSERT_NE(nullptr, hostPtrAllocation); uint64_t gpuVa = hostPtrAllocation->getGpuAddress(); - cmdQ->finish(); + cmdQ->finish(false); parseCommands(*cmdQ); auto &kernelInfo = kernel->getKernelInfo(); diff --git a/opencl/test/unit_test/sharings/va/va_sharing_tests.cpp b/opencl/test/unit_test/sharings/va/va_sharing_tests.cpp index 8bd132a59b..9f4551b95e 100644 --- a/opencl/test/unit_test/sharings/va/va_sharing_tests.cpp +++ b/opencl/test/unit_test/sharings/va/va_sharing_tests.cpp @@ -1418,7 +1418,7 @@ TEST_F(VaSharingTests, givenInteropUserSyncIsNotSpecifiedDuringContextCreationWh MockCommandQueueToTestFinish(Context *context, ClDevice *device, const cl_queue_properties *props) : MockCommandQueue(context, device, props, false) { } - cl_int finish() override { + cl_int finish(bool resolvePendingL3Flushes) override { finishCalled++; return CL_SUCCESS; }