diff --git a/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp b/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp index 5a320b94ad..e0e3eb4f3d 100644 --- a/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp +++ b/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp @@ -1138,7 +1138,9 @@ struct BlitEnqueueFlushTests : public BlitEnqueueTests<1> { using UltCommandStreamReceiver::UltCommandStreamReceiver; SubmissionStatus flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override { - latestFlushedCounter = ++(*flushCounter); + if (flushCounter) { + latestFlushedCounter = ++(*flushCounter); + } return UltCommandStreamReceiver::flush(batchBuffer, allocationsForResidency); } diff --git a/opencl/test/unit_test/command_queue/command_queue_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_tests.cpp index f1bf58fc81..6ad550bff5 100644 --- a/opencl/test/unit_test/command_queue/command_queue_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_tests.cpp @@ -885,7 +885,7 @@ HWTEST_F(CommandQueueTests, givenMultipleCommandQueuesWhenMarkerIsEmittedThenGra auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); MockContext context(device.get()); std::unique_ptr commandQ(new MockCommandQueue(&context, device.get(), 0, false)); - *device->getDefaultEngine().commandStreamReceiver->getTagAddress() = 0; + *device->getDefaultEngine().commandStreamReceiver->getTagAddress() = commandQ->getHeaplessStateInitEnabled() ? 1 : 0; commandQ->enqueueMarkerWithWaitList(0, nullptr, nullptr); commandQ->enqueueMarkerWithWaitList(0, nullptr, nullptr); @@ -2984,8 +2984,16 @@ HWTEST_F(CommandQueueOnSpecificEngineTests, givenNotInitializedCcsOsContextWhenC MockContext context{}; cl_command_queue_properties properties[5] = {}; + auto &compilerProductHelper = context.getDevice(0)->getCompilerProductHelper(); + auto heaplessModeEnabled = compilerProductHelper.isHeaplessModeEnabled(); + auto heaplessStateInit = compilerProductHelper.isHeaplessStateInitEnabled(heaplessModeEnabled); + OsContext &osContext = *context.getDevice(0)->getEngine(aub_stream::ENGINE_CCS, EngineUsage::regular).osContext; - EXPECT_FALSE(osContext.isInitialized()); + if (heaplessStateInit) { + EXPECT_TRUE(osContext.isInitialized()); + } else { + EXPECT_FALSE(osContext.isInitialized()); + } debugManager.flags.NodeOrdinal.set(static_cast(aub_stream::EngineType::ENGINE_CCS)); const auto rcsFamilyIndex = static_cast(context.getDevice(0)->getDevice().getEngineGroupIndexFromEngineGroupType(EngineGroupType::renderCompute)); diff --git a/opencl/test/unit_test/command_queue/enqueue_fill_buffer_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_fill_buffer_tests.cpp index 9964c04170..fcb3ed5410 100644 --- a/opencl/test/unit_test/command_queue/enqueue_fill_buffer_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_fill_buffer_tests.cpp @@ -582,7 +582,7 @@ HWTEST_F(EnqueueFillBufferCmdTests, givenEnqueueFillBufferWhenPatternAllocationI ASSERT_FALSE(csr.getAllocationsForReuse().peekIsEmpty()); - GraphicsAllocation *patternAllocation = csr.getAllocationsForReuse().peekHead(); + GraphicsAllocation *patternAllocation = pCmdQ->getHeaplessStateInitEnabled() ? csr.getAllocationsForReuse().peekTail() : csr.getAllocationsForReuse().peekHead(); ASSERT_NE(nullptr, patternAllocation); EXPECT_EQ(AllocationType::fillPattern, patternAllocation->getAllocationType()); diff --git a/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp index ec75388359..4787c347e7 100644 --- a/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp @@ -115,6 +115,7 @@ struct EnqueueHandlerWithAubSubCaptureTests : public EnqueueHandlerTest { MockCmdQWithAubSubCapture(Context *context, ClDevice *device) : CommandQueueHw(context, device, nullptr, false) {} WaitStatus waitUntilComplete(TaskCountType gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override { + waitUntilCompleteCalled = true; return CommandQueueHw::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList, skipWait); } @@ -133,6 +134,7 @@ HWTEST_F(EnqueueHandlerWithAubSubCaptureTests, givenEnqueueHandlerWithAubSubCapt DebugManagerStateRestore stateRestore; debugManager.flags.AUBDumpSubCaptureMode.set(1); + UnitTestSetter::disableHeaplessStateInit(stateRestore); auto aubCsr = new MockAubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(aubCsr); @@ -145,6 +147,7 @@ HWTEST_F(EnqueueHandlerWithAubSubCaptureTests, givenEnqueueHandlerWithAubSubCapt MockCmdQWithAubSubCapture cmdQ(context, pClDevice); MockKernelWithInternals mockKernel(*pClDevice); size_t gws[3] = {1, 0, 0}; + cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_TRUE(cmdQ.waitUntilCompleteCalled); @@ -174,6 +177,8 @@ HWTEST_F(EnqueueHandlerWithAubSubCaptureTests, givenEnqueueHandlerWithAubSubCapt debugManager.flags.AUBDumpSubCaptureMode.set(1); debugManager.flags.EnableTimestampPacket.set(true); + UnitTestSetter::disableHeaplessStateInit(stateRestore); + auto aubCsr = new MockAubCsr("", true, *pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(aubCsr); @@ -207,6 +212,7 @@ HWTEST_F(EnqueueHandlerWithAubSubCaptureTests, givenInputEventsWhenDispatchingEn DebugManagerStateRestore stateRestore; debugManager.flags.AUBDumpSubCaptureMode.set(1); debugManager.flags.EnableTimestampPacket.set(true); + UnitTestSetter::disableHeaplessStateInit(stateRestore); auto defaultEngine = defaultHwInfo->capabilityTable.defaultEngineType; @@ -497,6 +503,9 @@ HWTEST2_F(EnqueueHandlerTest, givenEnqueueHandlerWhenAddPatchInfoCommentsForAUBD MockKernelWithInternals mockKernel(*pClDevice); auto mockCmdQ = std::unique_ptr>(new MockCommandQueueHw(context, pClDevice, 0)); + if (mockCmdQ->getHeaplessModeEnabled()) { + GTEST_SKIP(); + } size_t gws[] = {1, 1, 1}; diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp index 19fd39b6f1..7e57c9168e 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp @@ -1040,7 +1040,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, EnqueueKernelTest, givenTwoEnqueueProgrammedWithinS hwParse.parseCommands(*pCmdQ); auto bbsCommands = findAll(hwParse.cmdList.begin(), hwParse.cmdList.end()); - EXPECT_EQ(bbsCommands.size(), 1u); + EXPECT_EQ(pCmdQ->getHeaplessStateInitEnabled() ? 0u : 1u, bbsCommands.size()); } HWTEST_F(EnqueueKernelTest, givenCsrInBatchingModeWhenFinishIsCalledThenBatchesSubmissionsAreFlushed) { @@ -2040,14 +2040,14 @@ HWTEST_F(PauseOnGpuTests, givenGpuScratchWriteEnabledWhenDispatchWalkersThenInse findLoadRegImms(hwParser.cmdList); - EXPECT_EQ(0u, loadRegImmsFound); + EXPECT_EQ(pCmdQ->getHeaplessStateInitEnabled() ? 1u : 0u, loadRegImmsFound); pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); hwParser.parseCommands(*pCmdQ); findLoadRegImms(hwParser.cmdList); - EXPECT_EQ(1u, loadRegImmsFound); + EXPECT_EQ(pCmdQ->getHeaplessStateInitEnabled() ? 2u : 1u, loadRegImmsFound); } HWTEST_F(PauseOnGpuTests, givenGpuScratchWriteEnabledWhenDispatcMultiplehWalkersThenInsertLoadRegisterImmCommandOnlyOnce) { diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_two_walker_ioq_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_two_walker_ioq_tests.cpp index 9772811232..a5f972c4d7 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_two_walker_ioq_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_two_walker_ioq_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -63,5 +63,5 @@ HWTEST_F(IOQWithTwoWalkers, GivenTwoCommandQueuesWhenEnqueuingKernelThenOnePipeC EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControl->getPostSyncOperation()); // The PC address should match the CS tag address EXPECT_EQ(commandStreamReceiver.getTagAllocation()->getGpuAddress(), NEO::UnitTestHelper::getPipeControlPostSyncAddress(*pipeControl)); - EXPECT_EQ(1u, pipeControl->getImmediateData()); + EXPECT_EQ(commandStreamReceiver.heaplessStateInitialized ? 2u : 1u, pipeControl->getImmediateData()); } diff --git a/opencl/test/unit_test/command_queue/enqueue_marker_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_marker_tests.cpp index 511d27aef3..38ecdfafc8 100644 --- a/opencl/test/unit_test/command_queue/enqueue_marker_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_marker_tests.cpp @@ -22,7 +22,7 @@ using namespace NEO; using MarkerTest = Test; HWTEST_F(MarkerTest, GivenCsrAndCmdqWithSameTaskLevelWhenEnqueingMarkerThenPipeControlIsAdded) { - typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); // Set task levels to known values. @@ -51,7 +51,12 @@ HWTEST_F(MarkerTest, GivenCsrAndCmdqWithSameTaskLevelWhenEnqueingMarkerThenPipeC // If CSR == CQ then a PC is required. auto itorCmd = reverseFind(cmdList.rbegin(), cmdList.rend()); - EXPECT_EQ(cmdList.rend(), itorCmd); + + if (mockCmdQ->getHeaplessStateInitEnabled()) { + EXPECT_NE(cmdList.rend(), itorCmd); + } else { + EXPECT_EQ(cmdList.rend(), itorCmd); + } } HWTEST_F(MarkerTest, GivenCsrAndCmdqWithDifferentTaskLevelsWhenEnqueingMarkerThenPipeControlIsNotAdded) { @@ -241,7 +246,8 @@ TEST_F(MarkerTest, givenMultipleEventsAndCompletedUserEventWhenTheyArePassedToMa &userEvent}; cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]); cl_event event = nullptr; - auto initialTaskCount = pCmdQ->taskCount; + + auto initialTaskCount = std::max(pCmdQ->getGpgpuCommandStreamReceiver().peekTaskCount(), pCmdQ->taskCount); pCmdQ->enqueueMarkerWithWaitList( numEventsInWaitList, diff --git a/opencl/test/unit_test/command_queue/enqueue_resource_barier_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_resource_barier_tests.cpp index 5e3d436838..c96ae3f79a 100644 --- a/opencl/test/unit_test/command_queue/enqueue_resource_barier_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_resource_barier_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2023 Intel Corporation + * Copyright (C) 2019-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -53,7 +53,8 @@ HWTEST_F(ResourceBarrierTest, whenEnqueueResourceBarrierCalledThenUpdateQueueCom BarrierCommand barrierCommand(pCmdQ, &descriptor, 1); - auto previousTaskCount = pCmdQ->taskCount; + auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); + auto previousTaskCount = std::max(pCmdQ->taskCount, csr.peekTaskCount()); auto previousTaskLevel = pCmdQ->taskLevel; const auto enqueueResult = pCmdQ->enqueueResourceBarrier(&barrierCommand, 0, nullptr, nullptr); diff --git a/opencl/test/unit_test/command_queue/enqueue_thread_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_thread_tests.cpp index 9f39d38c2d..62cb6aa51c 100644 --- a/opencl/test/unit_test/command_queue/enqueue_thread_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_thread_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -8,6 +8,7 @@ #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/command_stream/submission_status.h" #include "shared/source/helpers/aligned_memory.h" +#include "shared/source/helpers/compiler_product_helper.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/test_macros/hw_test.h" @@ -32,20 +33,27 @@ class CommandStreamReceiverMock : public UltCommandStreamReceiver { std::vector toFree; // pointers to be freed on destruction Device *pDevice; ClDevice *pClDevice; + bool heaplessStateInit = false; public: size_t expectedToFreeCount = (size_t)-1; CommandStreamReceiverMock(Device *pDevice) : UltCommandStreamReceiver(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()) { this->pDevice = pDevice; this->pClDevice = pDevice->getSpecializedDevice(); + auto &compilerProductHelper = pDevice->getCompilerProductHelper(); + auto heapless = compilerProductHelper.isHeaplessModeEnabled(); + this->heaplessStateInit = compilerProductHelper.isHeaplessStateInitEnabled(heapless); } SubmissionStatus flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override { + EXPECT_NE(nullptr, batchBuffer.commandBufferAllocation->getUnderlyingBuffer()); - toFree.push_back(batchBuffer.commandBufferAllocation); - batchBuffer.stream->replaceBuffer(nullptr, 0); - batchBuffer.stream->replaceGraphicsAllocation(nullptr); + if (!heaplessStateInit) { + toFree.push_back(batchBuffer.commandBufferAllocation); + batchBuffer.stream->replaceBuffer(nullptr, 0); + batchBuffer.stream->replaceGraphicsAllocation(nullptr); + } EXPECT_TRUE(this->ownershipMutex.try_lock()); this->ownershipMutex.unlock(); @@ -54,10 +62,13 @@ class CommandStreamReceiverMock : public UltCommandStreamReceiver { ~CommandStreamReceiverMock() override { EXPECT_FALSE(pClDevice->hasOwnership()); - if (expectedToFreeCount == (size_t)-1) { - EXPECT_GT(toFree.size(), 0u); // make sure flush was called - } else { - EXPECT_EQ(toFree.size(), expectedToFreeCount); + + if (!heaplessStateInit) { + if (expectedToFreeCount == (size_t)-1) { + EXPECT_GT(toFree.size(), 0u); // make sure flush was called + } else { + EXPECT_EQ(toFree.size(), expectedToFreeCount); + } } auto memoryManager = this->getMemoryManager(); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp index 75274337e1..317880f0e0 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp @@ -43,6 +43,10 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenForceCsrReprogrammingDebugVar DebugManagerStateRestore restore; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + if (commandStreamReceiver.heaplessStateInitialized) { + GTEST_SKIP(); + } + debugManager.flags.ForceCsrReprogramming.set(true); flushTask(commandStreamReceiver); @@ -449,6 +453,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, whenSamplerCach HWTEST_F(CommandStreamReceiverFlushTaskTests, whenSamplerCacheFlushBeforeThenSendPipecontrol) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + if (commandStreamReceiver.heaplessStateInitialized) { + GTEST_SKIP(); + } commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushBefore); configureCSRtoNonDirtyState(false); @@ -493,6 +500,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, whenSamplerCach HWTEST_F(CommandStreamReceiverFlushTaskTests, whenSamplerCacheFlushAfterThenSendPipecontrol) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + if (commandStreamReceiver.heaplessStateInitialized) { + GTEST_SKIP(); + } commandStreamReceiver.isPreambleSent = true; commandStreamReceiver.setSamplerCacheFlushRequired(CommandStreamReceiver::SamplerCacheFlushState::samplerCacheFlushAfter); configureCSRtoNonDirtyState(false); @@ -537,6 +547,10 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, WhenFlushingTaskThenCompletionStam HWTEST_F(CommandStreamReceiverFlushTaskTests, WhenFlushingTaskThenStateBaseAddressIsCorrect) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + if (commandStreamReceiver.heaplessModeEnabled) { + GTEST_SKIP(); + } + flushTask(commandStreamReceiver); if (!pDevice->getHardwareInfo().capabilityTable.supportsImages) { EXPECT_TRUE(commandStreamReceiver.dshState.updateAndCheck(&dsh)); @@ -581,6 +595,10 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDebugVariableSetWhenProgrammi debugManager.flags.ForceStatelessMocsEncryptionBit.set(1); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + if (commandStreamReceiver.heaplessStateInitialized) { + GTEST_SKIP(); + } + flushTask(commandStreamReceiver); auto &commandStreamCSR = commandStreamReceiver.commandStream; @@ -668,6 +686,9 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenNotApplicableGrfConfigWhenFlu HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenPreambleNotSentWhenFlushingTaskThenPreambleIsSent) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + if (commandStreamReceiver.heaplessStateInitialized) { + GTEST_SKIP(); + } commandStreamReceiver.isPreambleSent = false; flushTask(commandStreamReceiver); @@ -678,6 +699,10 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenPreambleNotSentWhenFlushingTa HWTEST_F(CommandStreamReceiverFlushTaskTests, givenFlushTaskWhenInitProgrammingFlagsIsCalledThenBindingTableBaseAddressRequiredIsSetCorrecty) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + if (commandStreamReceiver.heaplessStateInitialized) { + GTEST_SKIP(); + } + commandStreamReceiver.initProgrammingFlags(); EXPECT_TRUE(commandStreamReceiver.bindingTableBaseAddressRequired); @@ -968,6 +993,11 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEnoughMemoryOnlyForPreambleAn hardwareInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 1; auto mockDevice = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hardwareInfo, 0u)); auto &commandStreamReceiver = mockDevice->getUltCommandStreamReceiver(); + + if (commandStreamReceiver.heaplessStateInitialized) { + GTEST_SKIP(); + } + commandStreamReceiver.timestampPacketWriteEnabled = false; // Force a PIPE_CONTROL through a taskLevel transition taskLevel = commandStreamReceiver.peekTaskLevel() + 1; @@ -1033,9 +1063,12 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBothCsWhenFlushingTaskThenFlu } HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBothCsWhenFlushingTaskThenChainWithBatchBufferStart) { - typedef typename FamilyType::MI_BATCH_BUFFER_START MI_BATCH_BUFFER_START; - typedef typename FamilyType::MI_NOOP MI_NOOP; + using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; + using MI_NOOP = typename FamilyType::MI_NOOP; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + if (commandStreamReceiver.heaplessStateInitialized) { + GTEST_SKIP(); + } // Reserve space for 16 NOOPs commandStream.getSpace(16 * sizeof(MI_NOOP)); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp index 79d38a837b..539b928829 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp @@ -9,6 +9,7 @@ #include "shared/source/command_stream/scratch_space_controller.h" #include "shared/source/gen_common/reg_configs_common.h" #include "shared/source/gmm_helper/gmm_helper.h" +#include "shared/source/helpers/compiler_product_helper.h" #include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/state_base_address.h" #include "shared/source/memory_manager/internal_allocation_storage.h" @@ -985,6 +986,12 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenForced32BitAllocationsModeSto auto commandStreamReceiver = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); + auto &compilerProductHelper = pDevice->getCompilerProductHelper(); + auto heaplessEnabled = compilerProductHelper.isHeaplessModeEnabled(); + if (compilerProductHelper.isHeaplessStateInitEnabled(heaplessEnabled)) { + GTEST_SKIP(); + } + pDevice->getMemoryManager()->setForce32BitAllocations(true); pDevice->resetCommandStreamReceiver(commandStreamReceiver); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp index 37e600c383..b3ba9ba239 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp @@ -686,14 +686,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInDefaultModeWhenFlushTask auto &csr = commandQueue.getGpgpuCommandStreamReceiver(); - csr.flushTask(commandStream, - 0, - &dsh, - &ioh, - &ssh, - taskLevel, - dispatchFlags, - *pDevice); + flushTaskMethod(csr, commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(heaplessStateInit ? 2u : 1u, csr.peekLatestSentTaskCount()); EXPECT_EQ(heaplessStateInit ? 2u : 1u, csr.peekLatestFlushedTaskCount()); @@ -715,44 +708,23 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenFlushTas csr.useGpuIdleImplicitFlush = false; dispatchFlags.implicitFlush = false; - csr.flushTask(commandStream, - 0, - &dsh, - &ioh, - &ssh, - taskLevel, - dispatchFlags, - *pDevice); + flushTaskMethod(csr, commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(csr.heaplessStateInitialized ? 2u : 1u, csr.peekLatestSentTaskCount()); - EXPECT_EQ(csr.heaplessStateInitialized ? 1u : 0u, csr.peekLatestFlushedTaskCount()); + EXPECT_EQ(csr.heaplessStateInitialized ? 2u : 0u, csr.peekLatestFlushedTaskCount()); dispatchFlags.implicitFlush = false; - csr.flushTask(commandStream, - 0, - &dsh, - &ioh, - &ssh, - taskLevel, - dispatchFlags, - *pDevice); + flushTaskMethod(csr, commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(csr.heaplessStateInitialized ? 3u : 2u, csr.peekLatestSentTaskCount()); EXPECT_EQ(2u, csr.peekLatestFlushedTaskCount()); dispatchFlags.implicitFlush = false; - csr.flushTask(commandStream, - 0, - &dsh, - &ioh, - &ssh, - taskLevel, - dispatchFlags, - *pDevice); + flushTaskMethod(csr, commandStream, 0, &dsh, &ioh, &ssh, taskLevel, dispatchFlags, *pDevice); EXPECT_EQ(csr.heaplessStateInitialized ? 4u : 3u, csr.peekLatestSentTaskCount()); - EXPECT_EQ(csr.heaplessStateInitialized ? 3u : 2u, csr.peekLatestFlushedTaskCount()); + EXPECT_EQ(csr.heaplessStateInitialized ? 4u : 2u, csr.peekLatestFlushedTaskCount()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenWaitForTaskCountIsCalledWithTaskCountThatWasNotYetFlushedThenBatchedCommandBuffersAreSubmitted) { @@ -1914,6 +1886,15 @@ class UltCommandStreamReceiverForDispatchFlags : public UltCommandStreamReceiver return BaseClass::flushTask(commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device); } + + CompletionStamp flushTaskStateless(LinearStream &commandStream, size_t commandStreamStart, + const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, + TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override { + savedDispatchFlags = dispatchFlags; + return BaseClass::flushTaskStateless(commandStream, commandStreamStart, + dsh, ioh, ssh, taskLevel, dispatchFlags, device); + } + DispatchFlags savedDispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); }; diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp index b88220830d..a067f84e8b 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp @@ -742,6 +742,10 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenEnqueueWithoutArbitrationPoli debugManager.flags.ForceThreadArbitrationPolicyProgrammingWithScm.set(1); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + if (commandStreamReceiver.heaplessStateInitialized) { + GTEST_SKIP(); + } + commandStreamReceiver.streamProperties.initSupport(pDevice->getRootDeviceEnvironment()); auto &csrThreadArbitrationPolicy = commandStreamReceiver.streamProperties.stateComputeMode.threadArbitrationPolicy.value; diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_tests_xehp_and_later.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_tests_xehp_and_later.cpp index 1ef49867de..686d80776c 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_tests_xehp_and_later.cpp @@ -32,7 +32,9 @@ typedef UltCommandStreamReceiverTest CommandStreamReceiverFlushTaskXeHPAndLaterT HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, whenReprogrammingSshThenBindingTablePoolIsProgrammed) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); - + if (commandStreamReceiver.heaplessModeEnabled) { + GTEST_SKIP(); + } flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.getCS(0)); auto bindingTablePoolAlloc = getCommand(); @@ -48,6 +50,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, wh debugManager.flags.DisableCachingForHeaps.set(1); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + if (commandStreamReceiver.heaplessModeEnabled) { + GTEST_SKIP(); + } flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.getCS(0)); @@ -61,6 +66,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, wh HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, whenNotReprogrammingSshThenBindingTablePoolIsNotProgrammed) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + if (commandStreamReceiver.heaplessModeEnabled) { + GTEST_SKIP(); + } flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.getCS(0)); @@ -159,7 +167,9 @@ HWTEST2_F(CommandStreamReceiverFlushTaskXeHPAndLaterTests, givenSBACommandToProg HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, whenNotReprogrammingSshButInitProgrammingFlagsThenBindingTablePoolIsProgrammed) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); - + if (commandStreamReceiver.heaplessModeEnabled) { + GTEST_SKIP(); + } flushTask(commandStreamReceiver); parseCommands(commandStreamReceiver.getCS(0)); auto stateBaseAddress = getCommand(); @@ -221,8 +231,13 @@ HWTEST2_F(CommandStreamReceiverFlushTaskXeHPAndLaterTests, whenFlushAllCachesVar HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, givenconfigureCSRtoNonDirtyStateWhenFlushTaskIsCalledThenNoCommandsAreAdded) { configureCSRtoNonDirtyState(true); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + + auto sizeUsedBefore = commandStreamReceiver.commandStream.getUsed(); flushTask(commandStreamReceiver); - EXPECT_EQ(commandStreamReceiver.heaplessStateInitialized ? 1u : 0u, commandStreamReceiver.commandStream.getUsed()); + + auto sizeUsedAfter = commandStreamReceiver.commandStream.getUsed(); + + EXPECT_EQ(sizeUsedBefore, sizeUsedAfter); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, givenCsrInBatchingModeWhenTaskIsSubmittedViaCsrThenBbEndCoversPaddingEnoughToFitMiBatchBufferStart) { @@ -235,6 +250,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, gi mockCsr.getCS(1024u); auto &csrCommandStream = mockCsr.commandStream; + auto usedBefore = mockCsr.commandStream.getUsed(); + // we do level change that will emit PPC, fill all the space so only BB end fits. taskLevel++; auto ppcSize = MemorySynchronizationCommands::getSizeForSingleBarrier(false); @@ -244,7 +261,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, gi flushTask(mockCsr); - EXPECT_EQ(expectedUsedSize, mockCsr.commandStream.getUsed()); + auto usedAfter = mockCsr.commandStream.getUsed(); + auto sizeUsed = usedAfter - usedBefore; + EXPECT_EQ(expectedUsedSize, sizeUsed); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, GivenSameTaskLevelThenDontSendPipeControl) { @@ -281,7 +300,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, gi HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, givenCommandStreamReceiverWithInstructionCacheRequestWhenFlushTaskIsCalledThenPipeControlWithInstructionCacheIsEmitted) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); - + if (commandStreamReceiver.heaplessStateInitialized) { + GTEST_SKIP(); + } auto startRecursiveLockCounter = commandStreamReceiver.recursiveLockCounter.load(); configureCSRtoNonDirtyState(true); @@ -307,9 +328,10 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, gi commandStreamReceiver.taskLevel = taskLevel; taskLevel++; // submit with higher taskLevel + auto offset = commandStreamReceiver.commandStream.getUsed(); flushTask(commandStreamReceiver); - parseCommands(commandStreamReceiver.commandStream, 0); + parseCommands(commandStreamReceiver.commandStream, offset); auto itorPC = find(cmdList.begin(), cmdList.end()); EXPECT_EQ(cmdList.end(), itorPC); @@ -801,6 +823,9 @@ struct CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests : public Command HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests, givenMultipleStaticActivePartitionsWhenFlushingTaskThenExpectTagUpdatePipeControlWithPartitionFlagOnAndActivePartitionConfig) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + if (commandStreamReceiver.heaplessStateInitialized) { + GTEST_SKIP(); + } EXPECT_EQ(1u, commandStreamReceiver.activePartitionsConfig); commandStreamReceiver.activePartitions = 2; @@ -822,6 +847,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTile debugManager.flags.UpdateTaskCountFromWait.set(3); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + if (commandStreamReceiver.heaplessStateInitialized) { + GTEST_SKIP(); + } EXPECT_EQ(1u, commandStreamReceiver.activePartitionsConfig); commandStreamReceiver.activePartitions = 2; @@ -838,7 +866,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTile HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests, givenSingleStaticActivePartitionWhenFlushingTaskThenExpectTagUpdatePipeControlWithoutPartitionFlagOnAndNoActivePartitionConfig) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); - + if (commandStreamReceiver.heaplessStateInitialized) { + GTEST_SKIP(); + } commandStreamReceiver.activePartitions = 1; commandStreamReceiver.taskCount = 3; flushTask(commandStreamReceiver, true); @@ -859,6 +889,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTile debugManager.flags.UpdateTaskCountFromWait.set(3); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + if (commandStreamReceiver.heaplessStateInitialized) { + GTEST_SKIP(); + } EXPECT_EQ(1u, commandStreamReceiver.activePartitionsConfig); commandStreamReceiver.activePartitions = 2; @@ -888,7 +921,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTile HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests, givenMultipleDynamicActivePartitionsWhenFlushingTaskTwiceThenExpectTagUpdatePipeControlWithoutPartitionFlagAndPartitionRegisters) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); - + if (commandStreamReceiver.heaplessStateInitialized) { + GTEST_SKIP(); + } commandStreamReceiver.activePartitions = 2; commandStreamReceiver.taskCount = 3; commandStreamReceiver.staticWorkPartitioningEnabled = false; @@ -908,7 +943,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTile debugManager.flags.UpdateTaskCountFromWait.set(1); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); - + if (commandStreamReceiver.heaplessStateInitialized) { + GTEST_SKIP(); + } commandStreamReceiver.activePartitions = 2; commandStreamReceiver.taskCount = 3; commandStreamReceiver.staticWorkPartitioningEnabled = false; @@ -921,7 +958,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTile HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests, givenMultipleStaticActivePartitionsAndDirectSubmissionActiveWhenFlushingTaskThenExpectTagUpdatePipeControlWithPartitionFlagOnAndNoActivePartitionConfig) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); - + if (commandStreamReceiver.heaplessStateInitialized) { + GTEST_SKIP(); + } commandStreamReceiver.directSubmission = std::make_unique>>(commandStreamReceiver); commandStreamReceiver.directSubmissionAvailable = true; diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp index 0e18f851e6..24d9996229 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp @@ -1052,8 +1052,14 @@ HWTEST2_F(RelaxedOrderingBcsTests, givenDependenciesWhenFlushingThenProgramCorre // First submission with global state flushBcsTask(&csr, blitProperties, false, *pDevice); - EXPECT_TRUE(csr.latestFlushedBatchBuffer.hasStallingCmds); - EXPECT_FALSE(csr.latestFlushedBatchBuffer.hasRelaxedOrderingDependencies); + + if (csr.heaplessStateInitialized) { + EXPECT_FALSE(csr.latestFlushedBatchBuffer.hasStallingCmds); + EXPECT_TRUE(csr.latestFlushedBatchBuffer.hasRelaxedOrderingDependencies); + } else { + EXPECT_TRUE(csr.latestFlushedBatchBuffer.hasStallingCmds); + EXPECT_FALSE(csr.latestFlushedBatchBuffer.hasRelaxedOrderingDependencies); + } auto cmdsOffset = csr.commandStream.getUsed(); @@ -1136,8 +1142,14 @@ HWTEST2_F(RelaxedOrderingBcsTests, givenTagUpdateWhenFlushingThenDisableRelaxedO // First submission with global state flushBcsTask(&csr, blitProperties, false, *pDevice); - EXPECT_TRUE(csr.latestFlushedBatchBuffer.hasStallingCmds); - EXPECT_FALSE(csr.latestFlushedBatchBuffer.hasRelaxedOrderingDependencies); + + if (csr.heaplessStateInitialized) { + EXPECT_FALSE(csr.latestFlushedBatchBuffer.hasStallingCmds); + EXPECT_TRUE(csr.latestFlushedBatchBuffer.hasRelaxedOrderingDependencies); + } else { + EXPECT_TRUE(csr.latestFlushedBatchBuffer.hasStallingCmds); + EXPECT_FALSE(csr.latestFlushedBatchBuffer.hasRelaxedOrderingDependencies); + } debugManager.flags.UpdateTaskCountFromWait.set(0); @@ -1173,6 +1185,10 @@ HWTEST_F(BcsTests, givenBltSizeWithLeftoverWhenDispatchedThenProgramAllRequiredC uint32_t newTaskCount = 19; csr.taskCount = newTaskCount - 1; uint32_t expectedResursiveLockCount = csr.resourcesInitialized ? 1u : 0u; + if (csr.heaplessStateInitialized) { + expectedResursiveLockCount++; + } + EXPECT_EQ(expectedResursiveLockCount, csr.recursiveLockCounter.load()); auto bufferGpuVa = ptrOffset(buffer->getGraphicsAllocation(pDevice->getRootDeviceIndex())->getGpuAddress(), buffer->getOffset()); auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::hostPtrToBuffer, diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp index 996460fa65..44f8be8b3f 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp @@ -763,7 +763,8 @@ HWTEST_F(BcsTests, givenInputAllocationsWhenBlitDispatchedThenMakeAllAllocations auto hostAllocationPtr2 = allocateAlignedMemory(hostAllocationSize, MemoryConstants::pageSize); void *hostPtr2 = reinterpret_cast(hostAllocationPtr2.get()); - EXPECT_EQ(0u, csr.makeSurfacePackNonResidentCalled); + auto expectedCalled = csr.heaplessStateInitialized ? 1u : 0u; + EXPECT_EQ(expectedCalled, csr.makeSurfacePackNonResidentCalled); auto graphicsAllocation1 = buffer1->getGraphicsAllocation(pDevice->getRootDeviceIndex()); auto graphicsAllocation2 = buffer2->getGraphicsAllocation(pDevice->getRootDeviceIndex()); @@ -783,12 +784,13 @@ HWTEST_F(BcsTests, givenInputAllocationsWhenBlitDispatchedThenMakeAllAllocations csr.flushBcsTask(blitPropertiesContainer, false, false, *pDevice); + expectedCalled++; uint32_t residentAllocationsNum = 5u; EXPECT_TRUE(csr.isMadeResident(graphicsAllocation1)); EXPECT_TRUE(csr.isMadeResident(graphicsAllocation2)); EXPECT_TRUE(csr.isMadeResident(csr.getTagAllocation())); EXPECT_TRUE(csr.isMadeResident(csr.getTagAllocation())); - EXPECT_EQ(1u, csr.makeSurfacePackNonResidentCalled); + EXPECT_EQ(expectedCalled, csr.makeSurfacePackNonResidentCalled); auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironmentRef(); if (getHelper().isDummyBlitWaRequired()) { residentAllocationsNum++; @@ -883,7 +885,7 @@ HWTEST_F(BcsTests, givenBufferWhenBlitCalledThenFlushCommandBuffer) { auto &commandStream = csr.getCS(MemoryConstants::pageSize); size_t commandStreamOffset = 4; commandStream.getSpace(commandStreamOffset); - + commandStreamOffset = commandStream.getUsed(); uint32_t newTaskCount = 17; csr.taskCount = newTaskCount - 1; @@ -922,6 +924,7 @@ HWTEST_F(BcsTests, givenTaskStreamWhenFlushingThenStoreTaskStartAddress) { auto &commandStream = csr.getCS(MemoryConstants::pageSize); size_t commandStreamOffset = 4; commandStream.getSpace(commandStreamOffset); + commandStreamOffset = commandStream.getUsed(); auto blitProperties = BlitProperties::constructPropertiesForReadWrite(BlitterConstants::BlitDirection::hostPtrToBuffer, csr, graphicsAllocation, nullptr, hostPtr, diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_xe2_and_later.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_xe2_and_later.cpp index 80a94d03b9..d7e592211d 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_xe2_and_later.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_xe2_and_later.cpp @@ -20,6 +20,9 @@ HWTEST2_F(CommandStreamReceiverXe2AndLater, GivenPreambleNotSentAndDebugFlagEnab debugManager.flags.PipelinedPipelineSelect.set(true); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + if (commandStreamReceiver.heaplessStateInitialized) { + GTEST_SKIP(); + } commandStreamReceiver.isPreambleSent = false; flushTask(commandStreamReceiver); diff --git a/opencl/test/unit_test/device/device_tests.cpp b/opencl/test/unit_test/device/device_tests.cpp index 7001afdec1..a3ddd4dac6 100644 --- a/opencl/test/unit_test/device/device_tests.cpp +++ b/opencl/test/unit_test/device/device_tests.cpp @@ -296,6 +296,7 @@ TEST(DeviceCreation, givenMultiRootDeviceWhenTheyAreCreatedThenEachOsContextHasU executionEnvironment->rootDeviceEnvironments[i]->initGmm(); executionEnvironment->rootDeviceEnvironments[i]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true; } + executionEnvironment->calculateMaxOsContextCount(); auto device1 = std::unique_ptr(Device::create(executionEnvironment, 0u)); auto device2 = std::unique_ptr(Device::create(executionEnvironment, 1u)); @@ -355,8 +356,12 @@ TEST(DeviceCreation, givenMultiRootDeviceWhenTheyAreCreatedThenEachDeviceHasSepe executionEnvironment->prepareRootDeviceEnvironments(numDevices); for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) { executionEnvironment->rootDeviceEnvironments[i]->setHwInfoAndInitHelpers(defaultHwInfo.get()); + executionEnvironment->rootDeviceEnvironments[i]->initGmm(); } + + executionEnvironment->calculateMaxOsContextCount(); + auto device = std::unique_ptr(Device::create(executionEnvironment, 0u)); auto device2 = std::unique_ptr(Device::create(executionEnvironment, 1u)); @@ -373,6 +378,7 @@ TEST(DeviceCreation, givenMultiRootDeviceWhenTheyAreCreatedThenEachDeviceHasSepe executionEnvironment->rootDeviceEnvironments[i]->initGmm(); executionEnvironment->rootDeviceEnvironments[i]->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true; } + executionEnvironment->calculateMaxOsContextCount(); auto &gfxCoreHelper = executionEnvironment->rootDeviceEnvironments[0]->getHelper(); const auto &numGpgpuEngines = gfxCoreHelper.getGpgpuEngineInstances(*executionEnvironment->rootDeviceEnvironments[0]).size(); diff --git a/opencl/test/unit_test/device/sub_device_tests.cpp b/opencl/test/unit_test/device/sub_device_tests.cpp index 538e7192c6..8b3777ef88 100644 --- a/opencl/test/unit_test/device/sub_device_tests.cpp +++ b/opencl/test/unit_test/device/sub_device_tests.cpp @@ -73,6 +73,8 @@ TEST(SubDevicesTest, givenDeviceWithSubDevicesWhenSubDeviceApiRefCountsAreChange std::unordered_map mockableEnvs = {{"ZE_FLAT_DEVICE_HIERARCHY", "COMPOSITE"}}; VariableBackup *> mockableEnvValuesBackup(&IoFunctions::mockableEnvValues, &mockableEnvs); debugManager.flags.CreateMultipleSubDevices.set(2); + UnitTestSetter::disableHeaplessStateInit(restorer); + VariableBackup mockDeviceFlagBackup(&MockDevice::createSingleDevice, false); initPlatform(); auto nonDefaultPlatform = std::make_unique(*platform()->peekExecutionEnvironment()); @@ -108,7 +110,9 @@ TEST(SubDevicesTest, givenDeviceWithSubDevicesWhenSubDeviceApiRefCountsAreChange TEST(SubDevicesTest, givenDeviceWithSubDevicesAndSubDevicesAsDevicesIsSetWhenSubDeviceApiRefCountsAreChangedThenChangeIsNotPropagatedToRootDevice) { DebugManagerStateRestore restorer; debugManager.flags.CreateMultipleSubDevices.set(2); + UnitTestSetter::disableHeaplessStateInit(restorer); VariableBackup mockDeviceFlagBackup(&MockDevice::createSingleDevice, false); + initPlatform(); platform()->peekExecutionEnvironment()->setExposeSubDevicesAsDevices(1); auto nonDefaultPlatform = std::make_unique(*platform()->peekExecutionEnvironment()); @@ -144,6 +148,7 @@ TEST(SubDevicesTest, givenDeviceWithSubDevicesAndSubDevicesAsDevicesIsSetWhenSub TEST(SubDevicesTest, givenDeviceWithSubDevicesWhenSubDeviceInternalRefCountsAreChangedThenChangeIsPropagatedToRootDevice) { DebugManagerStateRestore restorer; debugManager.flags.CreateMultipleSubDevices.set(2); + VariableBackup mockDeviceFlagBackup(&MockDevice::createSingleDevice, false); auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); device->incRefInternal(); diff --git a/opencl/test/unit_test/event/async_events_handler_tests.cpp b/opencl/test/unit_test/event/async_events_handler_tests.cpp index d802a6cd74..5924ed357c 100644 --- a/opencl/test/unit_test/event/async_events_handler_tests.cpp +++ b/opencl/test/unit_test/event/async_events_handler_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -62,7 +62,7 @@ class AsyncEventsHandlerTests : public ::testing::Test { commandQueue = makeReleaseable(context.get(), context->getDevice(0), nullptr, false); - *(commandQueue->getGpgpuCommandStreamReceiver().getTagAddress()) = 0; + *(commandQueue->getGpgpuCommandStreamReceiver().getTagAddress()) = commandQueue->getHeaplessStateInitEnabled() ? 1 : 0; event1 = makeReleaseable(context.get(), commandQueue.get(), CL_COMMAND_BARRIER, CompletionStamp::notReady, CompletionStamp::notReady); event2 = makeReleaseable(context.get(), commandQueue.get(), CL_COMMAND_BARRIER, CompletionStamp::notReady, CompletionStamp::notReady); @@ -81,6 +81,7 @@ class AsyncEventsHandlerTests : public ::testing::Test { }; TEST_F(AsyncEventsHandlerTests, givenEventsWhenListIsProcessedThenUpdateExecutionStatus) { + event1->setTaskStamp(0, 0); event2->setTaskStamp(0, 0); @@ -314,9 +315,9 @@ TEST_F(AsyncEventsHandlerTests, givenUserEventWhenCallbackIsAddedThenDontRegiste TEST_F(AsyncEventsHandlerTests, givenRegistredEventsWhenProcessIsCalledThenReturnCandidateWithLowestTaskCount) { int event1Counter(0), event2Counter(0), event3Counter(0); - event1->setTaskStamp(0, 1); - event2->setTaskStamp(0, 2); - event3->setTaskStamp(0, 3); + event1->setTaskStamp(0, commandQueue->getHeaplessStateInitEnabled() ? 2 : 1); + event2->setTaskStamp(0, commandQueue->getHeaplessStateInitEnabled() ? 3 : 2); + event3->setTaskStamp(0, commandQueue->getHeaplessStateInitEnabled() ? 4 : 3); event2->addCallback(&this->callbackFcn, CL_COMPLETE, &event2Counter); handler->registerEvent(event2.get()); @@ -334,8 +335,8 @@ TEST_F(AsyncEventsHandlerTests, givenRegistredEventsWhenProcessIsCalledThenRetur } TEST_F(AsyncEventsHandlerTests, givenEventWithoutCallbacksWhenProcessedThenDontReturnAsSleepCandidate) { - event1->setTaskStamp(0, 1); - event2->setTaskStamp(0, 2); + event1->setTaskStamp(0, commandQueue->getHeaplessStateInitEnabled() ? 2 : 1); + event2->setTaskStamp(0, commandQueue->getHeaplessStateInitEnabled() ? 3 : 2); handler->registerEvent(event1.get()); event2->addCallback(&this->callbackFcn, CL_COMPLETE, &counter); @@ -348,7 +349,7 @@ TEST_F(AsyncEventsHandlerTests, givenEventWithoutCallbacksWhenProcessedThenDontR } TEST_F(AsyncEventsHandlerTests, givenNoGpuHangAndSleepCandidateWhenProcessedThenCallWaitWithQuickKmdSleepRequest) { - event1->setTaskStamp(0, 1); + event1->setTaskStamp(0, commandQueue->getHeaplessStateInitEnabled() ? 2 : 1); event1->addCallback(&this->callbackFcn, CL_COMPLETE, &counter); event1->handler->registerEvent(event1.get()); event1->handler->allowAsyncProcess.store(true); @@ -362,7 +363,7 @@ TEST_F(AsyncEventsHandlerTests, givenNoGpuHangAndSleepCandidateWhenProcessedThen } TEST_F(AsyncEventsHandlerTests, givenSleepCandidateAndGpuHangWhenProcessedThenCallWaitAndSetExecutionStatusToAbortedDueToGpuHang) { - event1->setTaskStamp(0, 1); + event1->setTaskStamp(0, commandQueue->getHeaplessStateInitEnabled() ? 2 : 1); event1->addCallback(&this->callbackFcn, CL_COMPLETE, &counter); event1->handler->registerEvent(event1.get()); event1->handler->allowAsyncProcess.store(true); diff --git a/opencl/test/unit_test/event/event_tests.cpp b/opencl/test/unit_test/event/event_tests.cpp index f0f97c266e..ea783a46e9 100644 --- a/opencl/test/unit_test/event/event_tests.cpp +++ b/opencl/test/unit_test/event/event_tests.cpp @@ -587,7 +587,7 @@ TEST_F(InternalsEventTest, givenBlockedKernelWithPrintfWhenSubmittedThenPrintOut event.submitCommand(false); - EXPECT_EQ(1u, mockCmdQueue.latestTaskCountWaited); + EXPECT_EQ(mockCmdQueue.getHeaplessStateInitEnabled() ? 2u : 1u, mockCmdQueue.latestTaskCountWaited); std::string output = testing::internal::GetCapturedStdout(); EXPECT_STREQ("test", output.c_str()); diff --git a/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h b/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h index 937b1e5979..a046364845 100644 --- a/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h +++ b/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h @@ -12,6 +12,7 @@ #include "shared/source/command_stream/preemption.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/cache_policy.h" +#include "shared/source/helpers/compiler_product_helper.h" #include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/preamble.h" #include "shared/source/memory_manager/graphics_allocation.h" @@ -48,6 +49,11 @@ struct UltCommandStreamReceiverTest flushTaskFlags.threadArbitrationPolicy = gfxCoreHelper.getDefaultThreadArbitrationPolicy(); pDevice->getGpgpuCommandStreamReceiver().setupContext(*pDevice->getDefaultEngine().osContext); + + auto &compilerProductHelper = pDevice->getCompilerProductHelper(); + + auto heaplessEnabled = compilerProductHelper.isHeaplessModeEnabled(); + this->heaplessStateEnabled = compilerProductHelper.isHeaplessStateInitEnabled(heaplessEnabled); } void initHeaps() { @@ -88,6 +94,19 @@ struct UltCommandStreamReceiverTest ClDeviceFixture::tearDown(); } + template + CompletionStamp flushTaskMethod(CommandStreamReceiverType &commandStreamReceiver, LinearStream &commandStream, size_t commandStreamStart, + const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, + TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) { + + if (reinterpret_cast *>(&commandStreamReceiver)->heaplessStateInitialized) { + return commandStreamReceiver.flushTaskStateless(commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device); + + } else { + return commandStreamReceiver.flushTask(commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device); + } + } + template CompletionStamp flushTask(CommandStreamReceiverType &commandStreamReceiver, bool block = false, @@ -95,19 +114,35 @@ struct UltCommandStreamReceiverTest bool requiresCoherency = false, bool lowPriority = false) { - flushTaskFlags.blocking = block; - flushTaskFlags.lowPriority = lowPriority; - flushTaskFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); + if (commandStreamReceiver.heaplessStateInitialized) { + flushTaskFlags.blocking = block; + flushTaskFlags.lowPriority = lowPriority; + flushTaskFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); - return commandStreamReceiver.flushTask( - commandStream, - startOffset, - &dsh, - &ioh, - &ssh, - taskLevel, - flushTaskFlags, - *pDevice); + return commandStreamReceiver.flushTaskStateless( + commandStream, + startOffset, + &dsh, + &ioh, + &ssh, + taskLevel, + flushTaskFlags, + *pDevice); + } else { + flushTaskFlags.blocking = block; + flushTaskFlags.lowPriority = lowPriority; + flushTaskFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo()); + + return commandStreamReceiver.flushTask( + commandStream, + startOffset, + &dsh, + &ioh, + &ssh, + taskLevel, + flushTaskFlags, + *pDevice); + } } template @@ -181,5 +216,6 @@ struct UltCommandStreamReceiverTest const size_t sizeStream = 512; const size_t alignmentStream = 0x1000; + bool heaplessStateEnabled = false; }; } // namespace NEO diff --git a/opencl/test/unit_test/helpers/task_information_tests.cpp b/opencl/test/unit_test/helpers/task_information_tests.cpp index 2f5c443d66..cdd77f7ad1 100644 --- a/opencl/test/unit_test/helpers/task_information_tests.cpp +++ b/opencl/test/unit_test/helpers/task_information_tests.cpp @@ -27,10 +27,9 @@ using namespace NEO; TEST(CommandTest, GivenNoTerminateFlagWhenSubmittingMapUnmapThenCsrIsFlushed) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); std::unique_ptr cmdQ(new MockCommandQueue(nullptr, device.get(), nullptr, false)); - MockCommandStreamReceiver csr(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield()); - MockBuffer buffer; - auto initialTaskCount = csr.peekTaskCount(); + MockBuffer buffer; + auto initialTaskCount = 0u; MemObjSizeArray size = {{1, 1, 1}}; MemObjOffsetArray offset = {{0, 0, 0}}; @@ -38,7 +37,7 @@ TEST(CommandTest, GivenNoTerminateFlagWhenSubmittingMapUnmapThenCsrIsFlushed) { CompletionStamp completionStamp = command->submit(20, false); auto expectedTaskCount = initialTaskCount + 1; - if (csr.heaplessStateInitialized) { + if (cmdQ->heaplessStateInitEnabled) { expectedTaskCount++; } EXPECT_EQ(expectedTaskCount, completionStamp.taskCount); @@ -67,22 +66,15 @@ TEST(CommandTest, GivenTerminateFlagWhenSubmittingMapUnmapThenFlushIsAborted) { TEST(CommandTest, GivenNoTerminateFlagWhenSubmittingMarkerThenCsrIsNotFlushed) { auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); std::unique_ptr cmdQ(new MockCommandQueue(nullptr, device.get(), nullptr, false)); - MockCommandStreamReceiver csr(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield()); MockBuffer buffer; - auto initialTaskCount = csr.peekTaskCount(); + auto heaplessStateInit = cmdQ->getHeaplessStateInitEnabled(); + auto initialTaskCount = heaplessStateInit ? 1u : 0u; + std::unique_ptr command(new CommandWithoutKernel(*cmdQ)); CompletionStamp completionStamp = command->submit(20, false); - auto heaplessStateInit = cmdQ->getHeaplessStateInitEnabled(); - if (heaplessStateInit) { - EXPECT_EQ(1u, initialTaskCount); - } else { - EXPECT_EQ(0u, initialTaskCount); - } - - EXPECT_EQ(0u, completionStamp.taskCount); - EXPECT_EQ(initialTaskCount, csr.peekTaskCount()); + EXPECT_EQ(initialTaskCount, completionStamp.taskCount); } TEST(CommandTest, GivenTerminateFlagWhenSubmittingMarkerThenFlushIsAborted) { diff --git a/opencl/test/unit_test/helpers/test_preamble_xehp_and_later.cpp b/opencl/test/unit_test/helpers/test_preamble_xehp_and_later.cpp index 960cb93205..28b37d8126 100644 --- a/opencl/test/unit_test/helpers/test_preamble_xehp_and_later.cpp +++ b/opencl/test/unit_test/helpers/test_preamble_xehp_and_later.cpp @@ -355,6 +355,10 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHpCommandStreamReceiverFlushTaskTests, givenDebug using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + if (commandStreamReceiver.heaplessStateInitialized) { + GTEST_SKIP(); + } + debugManager.flags.ForceMultiGpuAtomics.set(0); debugManager.flags.ForceMultiGpuPartialWrites.set(0); flushTask(commandStreamReceiver); diff --git a/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp b/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp index 02c5657684..6604914eb4 100644 --- a/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp +++ b/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp @@ -920,17 +920,17 @@ HWTEST_F(TimestampPacketTests, givenEventWhenReleasingThenCheckQueueResources) { cmdQ->flush(); auto tagAddress = csr.getTagAddress(); - *tagAddress = 1; + *tagAddress = csr.heaplessStateInitialized ? 2 : 1; - EXPECT_EQ(2u, csr.taskCount); - EXPECT_EQ(2u, cmdQ->taskCount); + EXPECT_EQ(csr.heaplessStateInitialized ? 3u : 2u, csr.taskCount); + EXPECT_EQ(csr.heaplessStateInitialized ? 3u : 2u, cmdQ->taskCount); clWaitForEvents(1, &clEvent); EXPECT_EQ(1u, deferredTimestampPackets->peekNodes().size()); EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size()); - *tagAddress = 2; + *tagAddress = csr.heaplessStateInitialized ? 3 : 2; clReleaseEvent(clEvent); @@ -964,10 +964,10 @@ HWTEST_F(TimestampPacketTests, givenAllEnginesReadyWhenWaitingForEventThenClearD cmdQ->flush(); - EXPECT_EQ(2u, csr.taskCount); + EXPECT_EQ(cmdQ->heaplessStateInitEnabled ? 3u : 2u, csr.taskCount); auto tagAddress = csr.getTagAddress(); - *tagAddress = 1; + *tagAddress = cmdQ->heaplessStateInitEnabled ? 2 : 1; auto eventObj1 = castToObjectOrAbort(event1); auto eventObj2 = castToObjectOrAbort(event2); @@ -985,7 +985,7 @@ HWTEST_F(TimestampPacketTests, givenAllEnginesReadyWhenWaitingForEventThenClearD EXPECT_EQ(1u, deferredTimestampPackets->peekNodes().size()); EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size()); - *tagAddress = 2; + *tagAddress = cmdQ->heaplessStateInitEnabled ? 3 : 2; eventObj1->wait(false, false); EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size()); @@ -1031,11 +1031,11 @@ HWTEST_F(TimestampPacketTests, givenNewSubmissionWhileWaitingThenDontReleaseDefe cmdQ->flush(); - EXPECT_EQ(2u, csr.taskCount); - EXPECT_EQ(2u, cmdQ->taskCount); + EXPECT_EQ(cmdQ->getHeaplessStateInitEnabled() ? 3u : 2u, csr.taskCount); + EXPECT_EQ(cmdQ->getHeaplessStateInitEnabled() ? 3u : 2u, cmdQ->taskCount); auto tagAddress = csr.getTagAddress(); - *tagAddress = 2; + *tagAddress = cmdQ->getHeaplessStateInitEnabled() ? 3 : 2; cmdQ->finish(); @@ -1083,20 +1083,20 @@ HWTEST_F(TimestampPacketTests, givenNewBcsSubmissionWhileWaitingThenDontReleaseD cmdQ->flush(); - EXPECT_EQ(2u, csr.taskCount); - cmdQ->bcsStates[0].taskCount = 2; + EXPECT_EQ(cmdQ->getHeaplessStateInitEnabled() ? 3u : 2u, csr.taskCount); + cmdQ->bcsStates[0].taskCount = cmdQ->getHeaplessStateInitEnabled() ? 3 : 2; auto tagAddress = csr.getTagAddress(); - *tagAddress = 2; + *tagAddress = cmdQ->getHeaplessStateInitEnabled() ? 3 : 2; cmdQ->finish(); - EXPECT_EQ(3u, cmdQ->bcsStates[0].taskCount); + EXPECT_EQ(cmdQ->getHeaplessStateInitEnabled() ? 4u : 3u, cmdQ->bcsStates[0].taskCount); EXPECT_EQ(1u, deferredTimestampPackets->peekNodes().size()); EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size()); - *tagAddress = 3; + *tagAddress = cmdQ->getHeaplessStateInitEnabled() ? 4 : 3; cmdQ->bcsEngines[0] = nullptr; cmdQ->bcsStates[0].engineType = aub_stream::EngineType::NUM_ENGINES; @@ -1140,10 +1140,10 @@ HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitForQueuesWhenFinishThenCa VariableBackup> backupSetupPauseAddress(&CpuIntrinsicsTests::setupPauseAddress); auto &csr = cmdQ->getGpgpuCommandStreamReceiver(); - *csr.getTagAddress() = 0; + *csr.getTagAddress() = cmdQ->getHeaplessStateInitEnabled() ? 1 : 0; CpuIntrinsicsTests::pauseAddress = csr.getTagAddress(); - CpuIntrinsicsTests::pauseValue = 3u; + CpuIntrinsicsTests::pauseValue = cmdQ->getHeaplessStateInitEnabled() ? 4u : 3u; CpuIntrinsicsTests::setupPauseAddress = [&]() { CpuIntrinsicsTests::pauseAddress = csr.getTagAddress(); }; diff --git a/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp b/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp index 6af4562a4e..b35e81115b 100644 --- a/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp +++ b/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp @@ -69,6 +69,27 @@ class MockCommandStreamReceiverHW : public UltCommandStreamReceiver dispatchFlags, device); } + + CompletionStamp flushTaskStateless( + LinearStream &commandStream, + size_t commandStreamStart, + const IndirectHeap *dsh, + const IndirectHeap *ioh, + const IndirectHeap *ssh, + TaskCountType taskLevel, + DispatchFlags &dispatchFlags, + Device &device) override { + stream = &commandStream; + return UltCommandStreamReceiver::flushTaskStateless( + commandStream, + commandStreamStart, + dsh, + ioh, + ssh, + taskLevel, + dispatchFlags, + device); + } LinearStream *stream = nullptr; }; @@ -419,7 +440,12 @@ HWTEST_F(TimestampPacketTests, givenPipeControlRequestWhenFlushingThenProgramPip auto pipeControl = genCmdCast(*hwParser.pipeControlList.begin()); ASSERT_NE(nullptr, pipeControl); - EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_NO_WRITE, pipeControl->getPostSyncOperation()); + + if (cmdQ.heaplessStateInitEnabled) { + EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControl->getPostSyncOperation()); + } else { + EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_NO_WRITE, pipeControl->getPostSyncOperation()); + } EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); diff --git a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp index ee7c6ccc5f..22c6dca93f 100644 --- a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp @@ -804,6 +804,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenPipeControlRequestWhenDispatchingBlitEnq resetCopyEngineSelector(); auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); + auto bcsCsr = static_cast *>(this->bcsCsr); auto queueCsr = static_cast *>(&cmdQ->getGpgpuCommandStreamReceiver()); @@ -814,10 +815,11 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenPipeControlRequestWhenDispatchingBlitEnq buffer->forceDisallowCPUCopy = true; void *hostPtr = reinterpret_cast(0x12340000); + auto offset = queueCsr->commandStream.getUsed(); cmdQ->enqueueWriteBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr); HardwareParse hwParser; - hwParser.parseCommands(queueCsr->commandStream); + hwParser.parseCommands(queueCsr->commandStream, offset); uint64_t pipeControlWriteAddress = 0; for (auto &cmd : hwParser.cmdList) { @@ -843,6 +845,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenPipeControlRequestWhenDispatchingBlitEnq EXPECT_EQ(pipeControlWriteAddress, genCmdCast(*(semaphores[1]))->getSemaphoreGraphicsAddress()); } else { EXPECT_EQ(UnitTestHelper::isAdditionalMiSemaphoreWaitRequired(device->getRootDeviceEnvironment()) ? 3u : 1u, semaphores.size()); + EXPECT_EQ(pipeControlWriteAddress, genCmdCast(*(semaphores[0]))->getSemaphoreGraphicsAddress()); } } @@ -890,12 +893,14 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenStallingCommandsOnNextFlushWhenReleasing }; auto &csrStream = cmdQ->getGpgpuCommandStreamReceiver().getCS(0); + auto csrOffset = csrStream.getUsed(); + EXPECT_TRUE(cmdQ->isStallingCommandsOnNextFlushRequired()); userEvent0.setStatus(CL_COMPLETE); EXPECT_FALSE(cmdQ->isStallingCommandsOnNextFlushRequired()); - EXPECT_TRUE(pipeControlLookup(csrStream, 0, device->getRootDeviceEnvironment())); + EXPECT_TRUE(pipeControlLookup(csrStream, csrOffset, device->getRootDeviceEnvironment())); - auto csrOffset = csrStream.getUsed(); + csrOffset = csrStream.getUsed(); userEvent1.setStatus(CL_COMPLETE); EXPECT_FALSE(pipeControlLookup(csrStream, csrOffset, device->getRootDeviceEnvironment())); cmdQ->isQueueBlocked(); @@ -977,6 +982,8 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenOutputTimestampPacketWhenBlitCalledThenp HardwareParse hwParser; hwParser.parseCommands(csr->commandStream); + auto heaplessStateInit = cmdQ->getHeaplessStateInitEnabled(); + uint32_t miFlushDwCmdsWithOutputCount = 0; bool blitCmdFound = false; for (auto &cmd : hwParser.cmdList) { @@ -985,19 +992,21 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenOutputTimestampPacketWhenBlitCalledThenp continue; } - EXPECT_EQ(miFlushDwCmdsWithOutputCount == 0, + bool correctMiFlushDwCmdsWithOutputCount = heaplessStateInit ? miFlushDwCmdsWithOutputCount == 1 : miFlushDwCmdsWithOutputCount == 0; + + EXPECT_EQ(correctMiFlushDwCmdsWithOutputCount, timestampPacketGpuWriteAddress == miFlushDwCmd->getDestinationAddress()); - EXPECT_EQ(miFlushDwCmdsWithOutputCount == 0, + EXPECT_EQ(correctMiFlushDwCmdsWithOutputCount, 0u == miFlushDwCmd->getImmediateData()); miFlushDwCmdsWithOutputCount++; } else if (genCmdCast(cmd)) { blitCmdFound = true; - EXPECT_EQ(0u, miFlushDwCmdsWithOutputCount); + EXPECT_EQ(heaplessStateInit ? 1u : 0u, miFlushDwCmdsWithOutputCount); } } - EXPECT_EQ(2u, miFlushDwCmdsWithOutputCount); // TimestampPacket + taskCount + EXPECT_EQ(heaplessStateInit ? 3u : 2u, miFlushDwCmdsWithOutputCount); // TimestampPacket + taskCount EXPECT_TRUE(blitCmdFound); } diff --git a/opencl/test/unit_test/memory_manager/unified_memory_manager_tests.cpp b/opencl/test/unit_test/memory_manager/unified_memory_manager_tests.cpp index 36eba51c77..9f086ae7c5 100644 --- a/opencl/test/unit_test/memory_manager/unified_memory_manager_tests.cpp +++ b/opencl/test/unit_test/memory_manager/unified_memory_manager_tests.cpp @@ -939,16 +939,18 @@ TEST(UnifiedSharedMemoryTransferCalls, givenHostUsmAllocationWhenPointerIsUsedFo ASSERT_EQ(CL_SUCCESS, status); auto neoQueue = castToObject(commandQueue); + auto heaplessStateInit = neoQueue->getHeaplessStateInitEnabled(); + auto &temporaryAllocations = neoQueue->getGpgpuCommandStreamReceiver().getTemporaryAllocations(); EXPECT_TRUE(temporaryAllocations.peekIsEmpty()); auto osContextId = neoQueue->getGpgpuCommandStreamReceiver().getOsContext().getContextId(); - EXPECT_EQ(1u, gpuAllocation->getTaskCount(osContextId)); + EXPECT_EQ(heaplessStateInit ? 2u : 1u, gpuAllocation->getTaskCount(osContextId)); status = clEnqueueReadBuffer(commandQueue, buffer, false, 0u, 4096u, hostMemory, 0u, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, status); EXPECT_TRUE(temporaryAllocations.peekIsEmpty()); - EXPECT_EQ(2u, gpuAllocation->getTaskCount(osContextId)); + EXPECT_EQ(heaplessStateInit ? 3u : 2u, gpuAllocation->getTaskCount(osContextId)); status = clReleaseMemObject(buffer); ASSERT_EQ(CL_SUCCESS, status); @@ -979,16 +981,18 @@ TEST(UnifiedSharedMemoryTransferCalls, givenDeviceUsmAllocationWhenPtrIsUsedForT ASSERT_EQ(CL_SUCCESS, status); auto neoQueue = castToObject(commandQueue); + auto heaplessStateInit = neoQueue->getHeaplessStateInitEnabled(); + auto &temporaryAllocations = neoQueue->getGpgpuCommandStreamReceiver().getTemporaryAllocations(); EXPECT_TRUE(temporaryAllocations.peekIsEmpty()); auto osContextId = neoQueue->getGpgpuCommandStreamReceiver().getOsContext().getContextId(); - EXPECT_EQ(1u, gpuAllocation->getTaskCount(osContextId)); + EXPECT_EQ(heaplessStateInit ? 2u : 1u, gpuAllocation->getTaskCount(osContextId)); status = clEnqueueReadBuffer(commandQueue, buffer, false, 0u, 4096u, deviceMemory, 0u, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, status); - EXPECT_EQ(2u, gpuAllocation->getTaskCount(osContextId)); + EXPECT_EQ(heaplessStateInit ? 3u : 2u, gpuAllocation->getTaskCount(osContextId)); status = clReleaseMemObject(buffer); ASSERT_EQ(CL_SUCCESS, status); @@ -1019,16 +1023,18 @@ TEST(UnifiedSharedMemoryTransferCalls, givenDeviceUsmAllocationWhenPtrIsUsedForT ASSERT_EQ(CL_SUCCESS, status); auto neoQueue = castToObject(commandQueue); + auto heaplessStateInit = neoQueue->getHeaplessStateInitEnabled(); + auto &temporaryAllocations = neoQueue->getGpgpuCommandStreamReceiver().getTemporaryAllocations(); EXPECT_TRUE(temporaryAllocations.peekIsEmpty()); auto osContextId = neoQueue->getGpgpuCommandStreamReceiver().getOsContext().getContextId(); - EXPECT_EQ(1u, gpuAllocation->getTaskCount(osContextId)); + EXPECT_EQ(heaplessStateInit ? 2u : 1u, gpuAllocation->getTaskCount(osContextId)); status = clEnqueueReadBuffer(commandQueue, buffer, true, 0u, 4096u, deviceMemory, 0u, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, status); - EXPECT_EQ(2u, gpuAllocation->getTaskCount(osContextId)); + EXPECT_EQ(heaplessStateInit ? 3u : 2u, gpuAllocation->getTaskCount(osContextId)); status = clReleaseMemObject(buffer); ASSERT_EQ(CL_SUCCESS, status); @@ -1142,16 +1148,18 @@ TEST(UnifiedSharedMemoryTransferCalls, givenSharedUsmAllocationWithoutLocalMemor ASSERT_EQ(CL_SUCCESS, status); auto neoQueue = castToObject(commandQueue); + auto heaplessStateInit = neoQueue->getHeaplessStateInitEnabled(); + auto &temporaryAllocations = neoQueue->getGpgpuCommandStreamReceiver().getTemporaryAllocations(); EXPECT_TRUE(temporaryAllocations.peekIsEmpty()); auto osContextId = neoQueue->getGpgpuCommandStreamReceiver().getOsContext().getContextId(); - EXPECT_EQ(1u, gpuAllocation->getTaskCount(osContextId)); + EXPECT_EQ(heaplessStateInit ? 2u : 1u, gpuAllocation->getTaskCount(osContextId)); status = clEnqueueReadBuffer(commandQueue, buffer, false, 0u, 4096u, sharedMemory, 0u, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, status); EXPECT_TRUE(temporaryAllocations.peekIsEmpty()); - EXPECT_EQ(2u, gpuAllocation->getTaskCount(osContextId)); + EXPECT_EQ(heaplessStateInit ? 3u : 2u, gpuAllocation->getTaskCount(osContextId)); status = clReleaseMemObject(buffer); ASSERT_EQ(CL_SUCCESS, status); @@ -1182,6 +1190,7 @@ TEST(UnifiedSharedMemoryTransferCalls, givenSharedUsmAllocationWithLocalMemoryWh auto neoQueue = castToObject(commandQueue); auto osContextId = neoQueue->getGpgpuCommandStreamReceiver().getOsContext().getContextId(); + auto heaplessStateInit = neoQueue->getHeaplessStateInitEnabled(); EXPECT_EQ(GraphicsAllocation::objectNotUsed, svmAllocation->cpuAllocation->getTaskCount(osContextId)); @@ -1191,12 +1200,12 @@ TEST(UnifiedSharedMemoryTransferCalls, givenSharedUsmAllocationWithLocalMemoryWh auto &temporaryAllocations = neoQueue->getGpgpuCommandStreamReceiver().getTemporaryAllocations(); EXPECT_TRUE(temporaryAllocations.peekIsEmpty()); - EXPECT_EQ(1u, svmAllocation->cpuAllocation->getTaskCount(osContextId)); + EXPECT_EQ(heaplessStateInit ? 2u : 1u, svmAllocation->cpuAllocation->getTaskCount(osContextId)); status = clEnqueueReadBuffer(commandQueue, buffer, false, 0u, 4096u, sharedMemory, 0u, nullptr, nullptr); ASSERT_EQ(CL_SUCCESS, status); EXPECT_TRUE(temporaryAllocations.peekIsEmpty()); - EXPECT_EQ(2u, svmAllocation->cpuAllocation->getTaskCount(osContextId)); + EXPECT_EQ(heaplessStateInit ? 3u : 2u, svmAllocation->cpuAllocation->getTaskCount(osContextId)); status = clReleaseMemObject(buffer); ASSERT_EQ(CL_SUCCESS, status); diff --git a/opencl/test/unit_test/mocks/mock_platform.cpp b/opencl/test/unit_test/mocks/mock_platform.cpp index 3c99127556..89e7c50e2e 100644 --- a/opencl/test/unit_test/mocks/mock_platform.cpp +++ b/opencl/test/unit_test/mocks/mock_platform.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -8,7 +8,11 @@ #include "opencl/test/unit_test/mocks/mock_platform.h" #include "shared/source/device/device.h" +#include "shared/source/execution_environment/root_device_environment.h" +#include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/device_factory.h" +#include "shared/test/common/helpers/default_hw_info.h" +#include "shared/test/common/helpers/unit_test_helper.h" #include "opencl/test/unit_test/mocks/mock_cl_execution_environment.h" @@ -20,6 +24,17 @@ bool initPlatform() { } bool MockPlatform::initializeWithNewDevices() { executionEnvironment.prepareRootDeviceEnvironments(1u); + + for (auto i = 0u; i < executionEnvironment.rootDeviceEnvironments.size(); i++) { + + executionEnvironment.rootDeviceEnvironments[i]->setHwInfoAndInitHelpers(NEO::defaultHwInfo.get()); + + UnitTestSetter::setRcsExposure(*executionEnvironment.rootDeviceEnvironments[i]); + UnitTestSetter::setCcsExposure(*executionEnvironment.rootDeviceEnvironments[i]); + } + + executionEnvironment.calculateMaxOsContextCount(); + return Platform::initialize(DeviceFactory::createDevices(executionEnvironment)); } diff --git a/opencl/test/unit_test/profiling/profiling_tests.cpp b/opencl/test/unit_test/profiling/profiling_tests.cpp index 8b9ace1a28..371de38fd6 100644 --- a/opencl/test/unit_test/profiling/profiling_tests.cpp +++ b/opencl/test/unit_test/profiling/profiling_tests.cpp @@ -1143,7 +1143,12 @@ HWTEST_F(ProfilingWithPerfCountersTests, GivenCommandQueueWithProfilingPerfCount ASSERT_NE(nullptr, pAfterPC); EXPECT_EQ(1u, pAfterPC->getCommandStreamerStallEnable()); - EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_NO_WRITE, pBeforePC->getPostSyncOperation()); + if (pCmdQ->getHeaplessStateInitEnabled()) { + EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pBeforePC->getPostSyncOperation()); + + } else { + EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_NO_WRITE, pBeforePC->getPostSyncOperation()); + } // expect MI_REPORT_PERF_COUNT after WALKER auto itorAfterReportPerf = find(itorGPGPUWalkerCmd, cmdList.end()); diff --git a/shared/test/common/mocks/mock_aub_csr.h b/shared/test/common/mocks/mock_aub_csr.h index 9e351b2af1..0ffcbbb3f1 100644 --- a/shared/test/common/mocks/mock_aub_csr.h +++ b/shared/test/common/mocks/mock_aub_csr.h @@ -70,6 +70,14 @@ struct MockAubCsr : public AUBCommandStreamReceiverHw { return AUBCommandStreamReceiverHw::flushTask(commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device); } + CompletionStamp flushTaskStateless(LinearStream &commandStream, size_t commandStreamStart, + const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, + TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) override { + recordedDispatchFlags = dispatchFlags; + + return AUBCommandStreamReceiverHw::flushTaskStateless(commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device); + } + DispatchMode peekDispatchMode() const { return this->dispatchMode; } diff --git a/shared/test/common/mocks/mock_sip.cpp b/shared/test/common/mocks/mock_sip.cpp index dfac917da8..a7f55ad763 100644 --- a/shared/test/common/mocks/mock_sip.cpp +++ b/shared/test/common/mocks/mock_sip.cpp @@ -48,7 +48,7 @@ void MockSipKernel::createMockSipAllocation() { 0u, MemoryConstants::pageSize, MemoryPool::system4KBPages, - 3u); + 14u); } } // namespace NEO