/* * Copyright (C) 2018 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "runtime/memory_manager/internal_allocation_storage.h" #include "runtime/memory_manager/memory_constants.h" #include "runtime/memory_manager/memory_manager.h" #include "unit_tests/fixtures/ult_command_stream_receiver_fixture.h" #include "unit_tests/helpers/debug_manager_state_restore.h" #include "unit_tests/mocks/mock_experimental_command_buffer.h" #include "test.h" #include "gtest/gtest.h" using namespace OCLRT; struct ExperimentalCommandBufferTest : public UltCommandStreamReceiverTest { void SetUp() override { dbgRestore.reset(new DebugManagerStateRestore()); DebugManager.flags.EnableExperimentalCommandBuffer.set(1); UltCommandStreamReceiverTest::SetUp(); } std::unique_ptr dbgRestore; }; struct MockExperimentalCommandBufferTest : public UltCommandStreamReceiverTest { void SetUp() override { UltCommandStreamReceiverTest::SetUp(); pDevice->getCommandStreamReceiver().setExperimentalCmdBuffer( std::unique_ptr(new MockExperimentalCommandBuffer(&pDevice->getCommandStreamReceiver()))); } }; HWTEST_F(MockExperimentalCommandBufferTest, givenEnabledExperimentalCmdBufferWhenCsrIsFlushedThenExpectProperlyFilledExperimentalCmdBuffer) { using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.storeMakeResidentAllocations = true; MockExperimentalCommandBuffer *mockExCmdBuffer = static_cast(commandStreamReceiver.experimentalCmdBuffer.get()); flushTask(commandStreamReceiver); ASSERT_NE(nullptr, mockExCmdBuffer->currentStream.get()); ASSERT_NE(nullptr, mockExCmdBuffer->currentStream->getGraphicsAllocation()); uint64_t exCmdBufferGpuAddr = mockExCmdBuffer->currentStream->getGraphicsAllocation()->getGpuAddress(); EXPECT_TRUE(commandStreamReceiver.isMadeResident(mockExCmdBuffer->currentStream->getGraphicsAllocation())); ASSERT_NE(nullptr, mockExCmdBuffer->experimentalAllocation); uint64_t exAllocationGpuAddr = mockExCmdBuffer->experimentalAllocation->getGpuAddress(); EXPECT_TRUE(commandStreamReceiver.isMadeResident(mockExCmdBuffer->experimentalAllocation)); ASSERT_NE(nullptr, mockExCmdBuffer->timestamps); EXPECT_TRUE(commandStreamReceiver.isMadeResident(mockExCmdBuffer->timestamps)); constexpr uint32_t expectedTsOffset = 2 * sizeof(uint64_t); EXPECT_EQ(expectedTsOffset, mockExCmdBuffer->timestampsOffset); constexpr uint32_t expectedExOffset = 0; EXPECT_EQ(expectedExOffset, mockExCmdBuffer->experimentalAllocationOffset); constexpr uint32_t expectedSemaphoreVal = 1; uintptr_t actualSemaphoreAddr = reinterpret_cast(mockExCmdBuffer->experimentalAllocation->getUnderlyingBuffer()) + mockExCmdBuffer->experimentalAllocationOffset; uint32_t *actualSemaphoreVal = reinterpret_cast(actualSemaphoreAddr); EXPECT_EQ(expectedSemaphoreVal, *actualSemaphoreVal); HardwareParse hwParserCsr; hwParserCsr.parseCommands(commandStreamReceiver.commandStream, 0); GenCmdList bbList = hwParserCsr.getCommandsList(); MI_BATCH_BUFFER_START *bbStart = nullptr; GenCmdList::iterator it = bbList.begin(); ASSERT_NE(bbList.end(), it); bbStart = reinterpret_cast(*it); ASSERT_NE(nullptr, bbStart); EXPECT_EQ(exCmdBufferGpuAddr, bbStart->getBatchBufferStartAddressGraphicsaddress472()); EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer()); MI_BATCH_BUFFER_END *bbEnd = nullptr; PIPE_CONTROL *pipeControl = nullptr; MI_SEMAPHORE_WAIT *semaphoreCmd = nullptr; HardwareParse hwParserExCmdBuffer; hwParserExCmdBuffer.parseCommands(*mockExCmdBuffer->currentStream, 0); it = hwParserExCmdBuffer.cmdList.begin(); GenCmdList::iterator end = hwParserExCmdBuffer.cmdList.end(); //1st PIPE_CONTROL with CS Stall ASSERT_NE(end, it); pipeControl = genCmdCast(*it); ASSERT_NE(nullptr, pipeControl); EXPECT_EQ(1u, pipeControl->getCommandStreamerStallEnable()); //2nd PIPE_CONTROL with ts addr uint64_t timeStampAddress = mockExCmdBuffer->timestamps->getGpuAddress(); uint32_t expectedTsAddress = static_cast(timeStampAddress & 0x0000FFFFFFFFULL); uint32_t expectedTsAddressHigh = static_cast(timeStampAddress >> 32); it++; ASSERT_NE(end, it); pipeControl = genCmdCast(*it); ASSERT_NE(nullptr, pipeControl); EXPECT_EQ(1u, pipeControl->getCommandStreamerStallEnable()); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, pipeControl->getPostSyncOperation()); EXPECT_EQ(expectedTsAddress, pipeControl->getAddress()); EXPECT_EQ(expectedTsAddressHigh, pipeControl->getAddressHigh()); //MI_SEMAPHORE_WAIT it++; ASSERT_NE(end, it); semaphoreCmd = genCmdCast(*it); ASSERT_NE(nullptr, semaphoreCmd); EXPECT_EQ(expectedSemaphoreVal, semaphoreCmd->getSemaphoreDataDword()); EXPECT_EQ(exAllocationGpuAddr, semaphoreCmd->getSemaphoreGraphicsAddress()); EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION_SAD_EQUAL_SDD, semaphoreCmd->getCompareOperation()); //3rd PIPE_CONTROL with CS stall it++; ASSERT_NE(end, it); pipeControl = genCmdCast(*it); ASSERT_NE(nullptr, pipeControl); EXPECT_EQ(1u, pipeControl->getCommandStreamerStallEnable()); //4th PIPE_CONTROL with ts addr timeStampAddress = mockExCmdBuffer->timestamps->getGpuAddress() + sizeof(uint64_t); expectedTsAddress = static_cast(timeStampAddress & 0x0000FFFFFFFFULL); expectedTsAddressHigh = static_cast(timeStampAddress >> 32); it++; ASSERT_NE(end, it); pipeControl = genCmdCast(*it); ASSERT_NE(nullptr, pipeControl); EXPECT_EQ(1u, pipeControl->getCommandStreamerStallEnable()); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, pipeControl->getPostSyncOperation()); EXPECT_EQ(expectedTsAddress, pipeControl->getAddress()); EXPECT_EQ(expectedTsAddressHigh, pipeControl->getAddressHigh()); //BB_END it++; ASSERT_NE(end, it); bbEnd = genCmdCast(*it); ASSERT_NE(nullptr, bbEnd); } HWTEST_F(MockExperimentalCommandBufferTest, givenEnabledExperimentalCmdBufferWhenCsrIsNotFlushedThenExperimentalBufferLinearStreamIsNotCreatedAndCmdBufferCommandsHaveProperlyOffsetedAddresses) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.storeMakeResidentAllocations = true; MockExperimentalCommandBuffer *mockExCmdBuffer = static_cast(commandStreamReceiver.experimentalCmdBuffer.get()); EXPECT_EQ(nullptr, mockExCmdBuffer->currentStream.get()); EXPECT_NE(nullptr, mockExCmdBuffer->experimentalAllocation); EXPECT_FALSE(commandStreamReceiver.isMadeResident(mockExCmdBuffer->experimentalAllocation)); EXPECT_NE(nullptr, mockExCmdBuffer->timestamps); EXPECT_FALSE(commandStreamReceiver.isMadeResident(mockExCmdBuffer->timestamps)); constexpr uint32_t expectedTsOffset = 0; EXPECT_EQ(expectedTsOffset, mockExCmdBuffer->timestampsOffset); constexpr uint32_t expectedExOffset = 0; EXPECT_EQ(expectedExOffset, mockExCmdBuffer->experimentalAllocationOffset); } HWTEST_F(MockExperimentalCommandBufferTest, givenEnabledExperimentalCmdBufferWhenCsrIsFlushedTwiceThenExpectProperlyFilledExperimentalCmdBufferAndTimestampOffset) { using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.storeMakeResidentAllocations = true; MockExperimentalCommandBuffer *mockExCmdBuffer = static_cast(commandStreamReceiver.experimentalCmdBuffer.get()); flushTask(commandStreamReceiver); size_t csrCmdBufferOffset = commandStreamReceiver.commandStream.getUsed(); ASSERT_NE(nullptr, mockExCmdBuffer->currentStream.get()); ASSERT_NE(nullptr, mockExCmdBuffer->currentStream->getGraphicsAllocation()); uint64_t exCmdBufferGpuAddr = mockExCmdBuffer->currentStream->getGraphicsAllocation()->getGpuAddress(); EXPECT_TRUE(commandStreamReceiver.isMadeResident(mockExCmdBuffer->currentStream->getGraphicsAllocation())); ASSERT_NE(nullptr, mockExCmdBuffer->experimentalAllocation); EXPECT_TRUE(commandStreamReceiver.isMadeResident(mockExCmdBuffer->experimentalAllocation)); ASSERT_NE(nullptr, mockExCmdBuffer->timestamps); EXPECT_TRUE(commandStreamReceiver.isMadeResident(mockExCmdBuffer->timestamps)); size_t cmbBufferOffset = mockExCmdBuffer->currentStream->getUsed(); flushTask(commandStreamReceiver); //two pairs of TS constexpr uint32_t expectedTsOffset = 4 * sizeof(uint64_t); EXPECT_EQ(expectedTsOffset, mockExCmdBuffer->timestampsOffset); constexpr uint32_t expectedExOffset = 0; EXPECT_EQ(expectedExOffset, mockExCmdBuffer->experimentalAllocationOffset); HardwareParse hwParserCsr; hwParserCsr.parseCommands(commandStreamReceiver.commandStream, csrCmdBufferOffset); GenCmdList bbList = hwParserCsr.getCommandsList(); MI_BATCH_BUFFER_START *bbStart = nullptr; exCmdBufferGpuAddr += cmbBufferOffset; GenCmdList::iterator it = bbList.begin(); ASSERT_NE(bbList.end(), it); bbStart = reinterpret_cast(*it); ASSERT_NE(nullptr, bbStart); EXPECT_EQ(exCmdBufferGpuAddr, bbStart->getBatchBufferStartAddressGraphicsaddress472()); EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer()); PIPE_CONTROL *pipeControl = nullptr; HardwareParse hwParserExCmdBuffer; hwParserExCmdBuffer.parseCommands(*mockExCmdBuffer->currentStream, cmbBufferOffset); it = hwParserExCmdBuffer.cmdList.begin(); GenCmdList::iterator end = hwParserExCmdBuffer.cmdList.end(); //2nd PIPE_CONTROL it++; uint64_t timeStampAddress = mockExCmdBuffer->timestamps->getGpuAddress() + 2 * sizeof(uint64_t); uint32_t expectedTsAddress = static_cast(timeStampAddress & 0x0000FFFFFFFFULL); uint32_t expectedTsAddressHigh = static_cast(timeStampAddress >> 32); ASSERT_NE(end, it); pipeControl = genCmdCast(*it); ASSERT_NE(nullptr, pipeControl); EXPECT_EQ(1u, pipeControl->getCommandStreamerStallEnable()); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, pipeControl->getPostSyncOperation()); EXPECT_EQ(expectedTsAddress, pipeControl->getAddress()); EXPECT_EQ(expectedTsAddressHigh, pipeControl->getAddressHigh()); //omit SEMAPHORE_WAIT and 3rd PIPE_CONTROL it++; it++; //get 4th PIPE_CONTROL timeStampAddress = mockExCmdBuffer->timestamps->getGpuAddress() + 3 * sizeof(uint64_t); expectedTsAddress = static_cast(timeStampAddress & 0x0000FFFFFFFFULL); expectedTsAddressHigh = static_cast(timeStampAddress >> 32); it++; ASSERT_NE(end, it); pipeControl = genCmdCast(*it); ASSERT_NE(nullptr, pipeControl); EXPECT_EQ(1u, pipeControl->getCommandStreamerStallEnable()); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, pipeControl->getPostSyncOperation()); EXPECT_EQ(expectedTsAddress, pipeControl->getAddress()); EXPECT_EQ(expectedTsAddressHigh, pipeControl->getAddressHigh()); } HWTEST_F(MockExperimentalCommandBufferTest, givenEnabledExperimentalCmdBufferWhenMemoryManagerAlreadyStoresAllocationThenUseItForLinearSteam) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); auto storage = commandStreamReceiver.getInternalAllocationStorage(); commandStreamReceiver.storeMakeResidentAllocations = true; MemoryManager *memoryManager = commandStreamReceiver.getMemoryManager(); //Make two allocations, since CSR will try to reuse it also auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{3 * MemoryConstants::pageSize}); storage->storeAllocation(std::unique_ptr(allocation), REUSABLE_ALLOCATION); allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{3 * MemoryConstants::pageSize}); storage->storeAllocation(std::unique_ptr(allocation), REUSABLE_ALLOCATION); MockExperimentalCommandBuffer *mockExCmdBuffer = static_cast(commandStreamReceiver.experimentalCmdBuffer.get()); flushTask(commandStreamReceiver); ASSERT_NE(nullptr, mockExCmdBuffer->currentStream.get()); ASSERT_NE(nullptr, mockExCmdBuffer->currentStream->getGraphicsAllocation()); EXPECT_EQ(allocation->getUnderlyingBuffer(), mockExCmdBuffer->currentStream->getGraphicsAllocation()->getUnderlyingBuffer()); EXPECT_TRUE(commandStreamReceiver.isMadeResident(mockExCmdBuffer->currentStream->getGraphicsAllocation())); } HWTEST_F(MockExperimentalCommandBufferTest, givenEnabledExperimentalCmdBufferWhenLinearStreamIsExhaustedThenStoreOldAllocationForReuseAndObtainNewAllocationForLinearStream) { using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); commandStreamReceiver.storeMakeResidentAllocations = true; MockExperimentalCommandBuffer *mockExCmdBuffer = static_cast(commandStreamReceiver.experimentalCmdBuffer.get()); flushTask(commandStreamReceiver); size_t csrCmdBufferOffset = commandStreamReceiver.commandStream.getUsed(); ASSERT_NE(nullptr, mockExCmdBuffer->currentStream.get()); ASSERT_NE(nullptr, mockExCmdBuffer->currentStream->getGraphicsAllocation()); uintptr_t oldCmdBufferAddress = reinterpret_cast(mockExCmdBuffer->currentStream->getGraphicsAllocation()); uint64_t oldExCmdBufferGpuAddr = mockExCmdBuffer->currentStream->getGraphicsAllocation()->getGpuAddress(); //leave space for single DWORD mockExCmdBuffer->currentStream->getSpace(mockExCmdBuffer->currentStream->getAvailableSpace() - sizeof(uint32_t)); HardwareParse hwParserCsr; hwParserCsr.parseCommands(commandStreamReceiver.commandStream, 0); GenCmdList bbList = hwParserCsr.getCommandsList(); MI_BATCH_BUFFER_START *bbStart = nullptr; GenCmdList::iterator it = bbList.begin(); ASSERT_NE(bbList.end(), it); bbStart = reinterpret_cast(*it); ASSERT_NE(nullptr, bbStart); EXPECT_EQ(oldExCmdBufferGpuAddr, bbStart->getBatchBufferStartAddressGraphicsaddress472()); EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer()); flushTask(commandStreamReceiver); ASSERT_NE(nullptr, mockExCmdBuffer->currentStream.get()); ASSERT_NE(nullptr, mockExCmdBuffer->currentStream->getGraphicsAllocation()); EXPECT_TRUE(commandStreamReceiver.isMadeResident(mockExCmdBuffer->currentStream->getGraphicsAllocation())); uintptr_t newCmdBufferAddress = reinterpret_cast(mockExCmdBuffer->currentStream->getGraphicsAllocation()); uint64_t newExCmdBufferGpuAddr = mockExCmdBuffer->currentStream->getGraphicsAllocation()->getGpuAddress(); EXPECT_NE(oldCmdBufferAddress, newCmdBufferAddress); EXPECT_NE(oldExCmdBufferGpuAddr, newExCmdBufferGpuAddr); hwParserCsr.TearDown(); hwParserCsr.parseCommands(commandStreamReceiver.commandStream, csrCmdBufferOffset); bbList = hwParserCsr.getCommandsList(); bbStart = nullptr; it = bbList.begin(); ASSERT_NE(bbList.end(), it); bbStart = reinterpret_cast(*it); ASSERT_NE(nullptr, bbStart); EXPECT_EQ(newExCmdBufferGpuAddr, bbStart->getBatchBufferStartAddressGraphicsaddress472()); EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer()); } HWTEST_F(ExperimentalCommandBufferTest, givenEnabledExperimentalCmdBufferWhenCommandStreamReceiverIsCreatedThenExperimentalCmdBufferIsNotNull) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); EXPECT_NE(nullptr, commandStreamReceiver.experimentalCmdBuffer.get()); } HWTEST_F(ExperimentalCommandBufferTest, givenEnabledExperimentalCmdBufferWhenCommandStreamReceiverIsFlushedThenExpectPrintAfterDtor) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); flushTask(commandStreamReceiver); //forced dtor to get printed timestamps testing::internal::CaptureStdout(); commandStreamReceiver.setExperimentalCmdBuffer(std::move(std::unique_ptr(nullptr))); std::string output = testing::internal::GetCapturedStdout(); EXPECT_STRNE(output.c_str(), ""); } HWTEST_F(ExperimentalCommandBufferTest, givenEnabledExperimentalCmdBufferWhenCommandStreamReceiverIsNotFlushedThenExpectNoPrintAfterDtor) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); //forced dtor to try to get printed timestamps testing::internal::CaptureStdout(); commandStreamReceiver.setExperimentalCmdBuffer(std::move(std::unique_ptr(nullptr))); std::string output = testing::internal::GetCapturedStdout(); EXPECT_STREQ(output.c_str(), ""); }