diff --git a/opencl/source/api/api.cpp b/opencl/source/api/api.cpp index 0b8a7f17a0..629c9e67bd 100644 --- a/opencl/source/api/api.cpp +++ b/opencl/source/api/api.cpp @@ -5760,6 +5760,13 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueVerifyMemoryINTEL(cl_command_queue comm return retVal; } + auto activeCopyEngines = pCommandQueue->peekActiveBcsStates(); + for (auto ©Engine : activeCopyEngines) { + if (copyEngine.isValid()) { + pCommandQueue->getBcsCommandStreamReceiver(copyEngine.engineType)->pollForCompletion(); + } + } + auto &csr = pCommandQueue->getGpgpuCommandStreamReceiver(); auto status = csr.expectMemory(allocationPtr, expectedData, sizeOfComparison, comparisonMode); return status ? CL_SUCCESS : CL_INVALID_VALUE; diff --git a/opencl/source/command_queue/command_queue.h b/opencl/source/command_queue/command_queue.h index 4c75789551..a24a876cdb 100644 --- a/opencl/source/command_queue/command_queue.h +++ b/opencl/source/command_queue/command_queue.h @@ -368,6 +368,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> { bool isTextureCacheFlushNeeded(uint32_t commandType) const; + const std::array &peekActiveBcsStates() const { return bcsStates; } + protected: void *enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet); cl_int enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr, EventsRequest &eventsRequest); diff --git a/opencl/test/unit_test/api/cl_enqueue_verify_memory.inl b/opencl/test/unit_test/api/cl_enqueue_verify_memory.inl index c9f8577ca5..3178d7875b 100644 --- a/opencl/test/unit_test/api/cl_enqueue_verify_memory.inl +++ b/opencl/test/unit_test/api/cl_enqueue_verify_memory.inl @@ -1,11 +1,12 @@ /* - * Copyright (C) 2019-2022 Intel Corporation + * Copyright (C) 2019-2023 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/common/mocks/mock_csr.h" +#include "shared/test/common/mocks/mock_os_context.h" #include "shared/test/common/test_macros/test.h" #include "opencl/extensions/public/cl_ext_private.h" @@ -64,3 +65,38 @@ TEST_F(ClEnqueueVerifyMemoryIntelTests, givenNotEqualMemoryWhenCallingVerifyMemo cl_int retval = clEnqueueVerifyMemoryINTEL(pCommandQueue, gpuAddress, &differentMemory, sizeof(differentMemory), comparisonMode); EXPECT_EQ(CL_INVALID_VALUE, retval); } + +HWTEST_F(ClEnqueueVerifyMemoryIntelTests, givenActiveBcsEngineWhenCallingExpectMemoryThenPollForCompletionOnAllEngines) { + UltCommandStreamReceiver ultCsrBcs0(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); + UltCommandStreamReceiver ultCsrBcs1(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); + + MockOsContext osContext0(0, {{aub_stream::ENGINE_BCS, EngineUsage::Regular}, 0, PreemptionMode::Disabled, false, false}); + MockOsContext osContext1(1, {{aub_stream::ENGINE_BCS1, EngineUsage::Regular}, 0, PreemptionMode::Disabled, false, false}); + + EngineControl engineControl[2] = {{&ultCsrBcs0, &osContext0}, {&ultCsrBcs1, &osContext1}}; + + pCommandQueue->bcsInitialized = true; + pCommandQueue->bcsEngines[EngineHelpers::getBcsIndex(aub_stream::ENGINE_BCS)] = &engineControl[0]; + pCommandQueue->bcsEngines[EngineHelpers::getBcsIndex(aub_stream::ENGINE_BCS1)] = &engineControl[1]; + + pCommandQueue->bcsStates[0].engineType = aub_stream::ENGINE_BCS; + pCommandQueue->bcsStates[1].engineType = aub_stream::ENGINE_BCS1; + + EXPECT_EQ(0u, ultCsrBcs0.pollForCompletionCalled); + EXPECT_EQ(0u, ultCsrBcs1.pollForCompletionCalled); + + cl_int retval = clEnqueueVerifyMemoryINTEL(pCommandQueue, gpuAddress, expected, expectedSize, comparisonMode); + EXPECT_EQ(CL_SUCCESS, retval); + + EXPECT_EQ(1u, ultCsrBcs0.pollForCompletionCalled); + EXPECT_EQ(1u, ultCsrBcs1.pollForCompletionCalled); + + pCommandQueue->bcsEngines[EngineHelpers::getBcsIndex(aub_stream::ENGINE_BCS)]->commandStreamReceiver = nullptr; + pCommandQueue->bcsEngines[EngineHelpers::getBcsIndex(aub_stream::ENGINE_BCS1)]->commandStreamReceiver = nullptr; + + pCommandQueue->bcsStates[0].engineType = aub_stream::NUM_ENGINES; + pCommandQueue->bcsStates[1].engineType = aub_stream::NUM_ENGINES; + + pCommandQueue->bcsEngines[EngineHelpers::getBcsIndex(aub_stream::ENGINE_BCS)] = nullptr; + pCommandQueue->bcsEngines[EngineHelpers::getBcsIndex(aub_stream::ENGINE_BCS1)] = nullptr; +} diff --git a/opencl/test/unit_test/mocks/mock_command_queue.h b/opencl/test/unit_test/mocks/mock_command_queue.h index 9f103a4df9..528038605a 100644 --- a/opencl/test/unit_test/mocks/mock_command_queue.h +++ b/opencl/test/unit_test/mocks/mock_command_queue.h @@ -24,6 +24,8 @@ class MockCommandQueue : public CommandQueue { public: using CommandQueue::bcsEngines; using CommandQueue::bcsEngineTypes; + using CommandQueue::bcsInitialized; + using CommandQueue::bcsStates; using CommandQueue::bcsTimestampPacketContainers; using CommandQueue::blitEnqueueAllowed; using CommandQueue::blitEnqueueImageAllowed; diff --git a/shared/test/common/libult/ult_command_stream_receiver.h b/shared/test/common/libult/ult_command_stream_receiver.h index a3c700df9d..96ae90e668 100644 --- a/shared/test/common/libult/ult_command_stream_receiver.h +++ b/shared/test/common/libult/ult_command_stream_receiver.h @@ -368,6 +368,10 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ return commandStreamReceiverType; } + void pollForCompletion() override { + pollForCompletionCalled++; + } + std::vector aubCommentMessages; BatchBuffer latestFlushedBatchBuffer = {}; @@ -385,6 +389,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ uint32_t createPerDssBackedBufferCalled = 0; uint32_t initDirectSubmissionCalled = 0; uint32_t fillReusableAllocationsListCalled = 0; + uint32_t pollForCompletionCalled = 0; int ensureCommandBufferAllocationCalled = 0; DispatchFlags recordedDispatchFlags; BlitPropertiesContainer receivedBlitProperties = {};