/* * Copyright (C) 2019-2020 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/event/user_event.h" #include "opencl/test/unit_test/helpers/hw_parse.h" #include "opencl/test/unit_test/helpers/unit_test_helper.h" #include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_device.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "opencl/test/unit_test/mocks/mock_timestamp_container.h" #include "test.h" using namespace NEO; struct BlitAuxTranslationTests : public ::testing::Test { class BcsMockContext : public MockContext { public: BcsMockContext(ClDevice *device) : MockContext(device) { bcsOsContext.reset(OsContext::create(nullptr, 0, 0, aub_stream::ENGINE_BCS, PreemptionMode::Disabled, false, false, false)); bcsCsr.reset(createCommandStream(*device->getExecutionEnvironment(), device->getRootDeviceIndex())); bcsCsr->setupContext(*bcsOsContext); bcsCsr->initializeTagAllocation(); } BlitOperationResult blitMemoryToAllocation(MemObj &memObj, GraphicsAllocation *memory, void *hostPtr, size_t size) const override { auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, *bcsCsr, memory, nullptr, hostPtr, memory->getGpuAddress(), 0, 0, 0, size); BlitPropertiesContainer container; container.push_back(blitProperties); bcsCsr->blitBuffer(container, true); return BlitOperationResult::Success; } std::unique_ptr bcsOsContext; std::unique_ptr bcsCsr; }; template void SetUpT() { auto &hwHelper = HwHelper::get(platformDevices[0]->platform.eRenderCoreFamily); if (is32bit || !hwHelper.requiresAuxResolves()) { GTEST_SKIP(); } DebugManager.flags.EnableTimestampPacket.set(1); DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(1); DebugManager.flags.ForceAuxTranslationMode.set(1); DebugManager.flags.CsrDispatchMode.set(static_cast(DispatchMode::ImmediateDispatch)); device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(nullptr)); auto &capabilityTable = device->getRootDeviceEnvironment().getMutableHardwareInfo()->capabilityTable; bool createBcsEngine = !capabilityTable.blitterOperationsSupported; capabilityTable.blitterOperationsSupported = true; if (createBcsEngine) { auto &engine = device->getEngine(HwHelperHw::lowPriorityEngineType, true); bcsOsContext.reset(OsContext::create(nullptr, 1, 0, aub_stream::ENGINE_BCS, PreemptionMode::Disabled, false, false, false)); engine.osContext = bcsOsContext.get(); engine.commandStreamReceiver->setupContext(*bcsOsContext); } bcsMockContext = std::make_unique(device.get()); auto mockCmdQueue = new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr); commandQueue.reset(mockCmdQueue); mockKernel = std::make_unique(*device, bcsMockContext.get()); mockKernel->mockKernel->auxTranslationRequired = true; auto mockProgram = mockKernel->mockProgram; mockProgram->setAllowNonUniform(true); gpgpuCsr = mockCmdQueue->gpgpuEngine->commandStreamReceiver; bcsCsr = mockCmdQueue->bcsEngine->commandStreamReceiver; } template void TearDownT() {} template void setMockKernelArgs(std::array buffers) { if (mockKernel->kernelInfo.kernelArgInfo.size() < buffers.size()) { mockKernel->kernelInfo.kernelArgInfo.resize(buffers.size()); } mockKernel->mockKernel->initialize(); for (uint32_t i = 0; i < buffers.size(); i++) { cl_mem clMem = buffers[i]; mockKernel->kernelInfo.kernelArgInfo.at(i).kernelArgPatchInfoVector.resize(1); mockKernel->kernelInfo.kernelArgInfo.at(i).pureStatefulBufferAccess = false; mockKernel->mockKernel->setArgBuffer(i, sizeof(cl_mem *), &clMem); } } ReleaseableObjectPtr createBuffer(size_t size, bool compressed) { auto buffer = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, size, nullptr, retVal)); if (compressed) { buffer->getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED); } else { buffer->getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER); } return buffer; } template GenCmdList getCmdList(LinearStream &linearStream) { HardwareParse hwParser; hwParser.parseCommands(linearStream); return hwParser.cmdList; } template GenCmdList::iterator expectPipeControl(GenCmdList::iterator itorStart, GenCmdList::iterator itorEnd) { using PIPE_CONTROL = typename Family::PIPE_CONTROL; PIPE_CONTROL *pipeControlCmd = nullptr; GenCmdList::iterator commandItor = itorStart; bool stallingWrite = false; do { commandItor = find(commandItor, itorEnd); if (itorEnd == commandItor) { EXPECT_TRUE(false); return itorEnd; } pipeControlCmd = genCmdCast(*commandItor); stallingWrite = pipeControlCmd->getPostSyncOperation() == PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA && pipeControlCmd->getCommandStreamerStallEnable(); ++commandItor; } while (!stallingWrite); return --commandItor; } template GenCmdList::iterator expectMiFlush(GenCmdList::iterator itorStart, GenCmdList::iterator itorEnd) { Family *miFlushCmd = nullptr; GenCmdList::iterator commandItor = itorStart; bool miFlushWithMemoryWrite = false; do { commandItor = find(commandItor, itorEnd); if (itorEnd == commandItor) { EXPECT_TRUE(false); return itorEnd; } miFlushCmd = genCmdCast(*commandItor); miFlushWithMemoryWrite = miFlushCmd->getDestinationAddress() != 0; ++commandItor; } while (!miFlushWithMemoryWrite); return --commandItor; } template GenCmdList::iterator expectCommand(GenCmdList::iterator itorStart, GenCmdList::iterator itorEnd) { auto commandItor = find(itorStart, itorEnd); EXPECT_TRUE(commandItor != itorEnd); return commandItor; } template void verifySemaphore(GenCmdList::iterator &semaphoreItor, uint64_t expectedAddress) { using MI_SEMAPHORE_WAIT = typename Family::MI_SEMAPHORE_WAIT; auto semaphoreCmd = genCmdCast(*semaphoreItor); EXPECT_EQ(expectedAddress, semaphoreCmd->getSemaphoreGraphicsAddress()); } DebugManagerStateRestore restore; std::unique_ptr bcsOsContext; std::unique_ptr device; std::unique_ptr bcsMockContext; std::unique_ptr commandQueue; std::unique_ptr mockKernel; CommandStreamReceiver *bcsCsr = nullptr; CommandStreamReceiver *gpgpuCsr = nullptr; size_t gws[3] = {63, 0, 0}; size_t lws[3] = {16, 0, 0}; uint32_t hostPtr = 0; cl_int retVal = CL_SUCCESS; }; HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitAuxTranslationWhenConstructingCommandBufferThenEnsureCorrectOrder) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; auto buffer0 = createBuffer(1, true); auto buffer1 = createBuffer(1, false); auto buffer2 = createBuffer(1, true); setMockKernelArgs(std::array{{buffer0.get(), buffer1.get(), buffer2.get()}}); auto mockCmdQ = static_cast *>(commandQueue.get()); auto initialBcsTaskCount = mockCmdQ->bcsTaskCount; mockCmdQ->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, lws, 0, nullptr, nullptr); EXPECT_EQ(mockCmdQ->bcsTaskCount, initialBcsTaskCount + 1); // Gpgpu command buffer { auto cmdListCsr = getCmdList(gpgpuCsr->getCS(0)); auto cmdListQueue = getCmdList(commandQueue->getCS(0)); // Barrier expectPipeControl(cmdListCsr.begin(), cmdListCsr.end()); // Aux to NonAux auto cmdFound = expectCommand(cmdListQueue.begin(), cmdListQueue.end()); cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); // Walker cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); // NonAux to Aux cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); // task count expectPipeControl(++cmdFound, cmdListQueue.end()); } // BCS command buffer { auto cmdList = getCmdList(bcsCsr->getCS(0)); // Barrier auto cmdFound = expectCommand(cmdList.begin(), cmdList.end()); // Aux to NonAux cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); // wait for NDR cmdFound = expectCommand(++cmdFound, cmdList.end()); // NonAux to Aux cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); // taskCount expectCommand(++cmdFound, cmdList.end()); } } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitAuxTranslationWhenConstructingBlockedCommandBufferThenEnsureCorrectOrder) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; auto buffer0 = createBuffer(1, true); auto buffer1 = createBuffer(1, false); auto buffer2 = createBuffer(1, true); setMockKernelArgs(std::array{{buffer0.get(), buffer1.get(), buffer2.get()}}); auto mockCmdQ = static_cast *>(commandQueue.get()); auto initialBcsTaskCount = mockCmdQ->bcsTaskCount; UserEvent userEvent; cl_event waitlist[] = {&userEvent}; mockCmdQ->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, lws, 1, waitlist, nullptr); userEvent.setStatus(CL_COMPLETE); EXPECT_EQ(mockCmdQ->bcsTaskCount, initialBcsTaskCount + 1); // Gpgpu command buffer { auto cmdListCsr = getCmdList(gpgpuCsr->getCS(0)); auto ultCsr = static_cast *>(gpgpuCsr); auto cmdListQueue = getCmdList(*ultCsr->lastFlushedCommandStream); // Barrier expectPipeControl(cmdListCsr.begin(), cmdListCsr.end()); // Aux to NonAux auto cmdFound = expectCommand(cmdListQueue.begin(), cmdListQueue.end()); cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); // Walker cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); // NonAux to Aux cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); // task count expectPipeControl(++cmdFound, cmdListQueue.end()); } // BCS command buffer { auto cmdList = getCmdList(bcsCsr->getCS(0)); // Barrier auto cmdFound = expectCommand(cmdList.begin(), cmdList.end()); // Aux to NonAux cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); // wait for NDR cmdFound = expectCommand(++cmdFound, cmdList.end()); // NonAux to Aux cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); cmdFound = expectCommand(++cmdFound, cmdList.end()); // taskCount expectCommand(++cmdFound, cmdList.end()); } EXPECT_FALSE(mockCmdQ->isQueueBlocked()); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenConstructingCommandBufferThenSynchronizeBarrier) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto buffer = createBuffer(1, true); setMockKernelArgs(std::array{{buffer.get()}}); commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); auto cmdListCsr = getCmdList(gpgpuCsr->getCS(0)); auto pipeControl = expectPipeControl(cmdListCsr.begin(), cmdListCsr.end()); auto pipeControlCmd = genCmdCast(*pipeControl); uint64_t low = pipeControlCmd->getAddress(); uint64_t high = pipeControlCmd->getAddressHigh(); uint64_t barrierGpuAddress = (high << 32) | low; auto cmdList = getCmdList(bcsCsr->getCS(0)); auto semaphore = expectCommand(cmdList.begin(), cmdList.end()); verifySemaphore(semaphore, barrierGpuAddress); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenConstructingCommandBufferThenSynchronizeBcsOutput) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using WALKER_TYPE = typename FamilyType::WALKER_TYPE; auto buffer0 = createBuffer(1, true); auto buffer1 = createBuffer(1, true); setMockKernelArgs(std::array{{buffer0.get(), buffer1.get()}}); commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); uint64_t auxToNonAuxOutputAddress[2] = {}; uint64_t nonAuxToAuxOutputAddress[2] = {}; { auto cmdListBcs = getCmdList(bcsCsr->getCS(0)); auto cmdFound = expectCommand(cmdListBcs.begin(), cmdListBcs.end()); cmdFound = expectMiFlush(++cmdFound, cmdListBcs.end()); auto miflushDwCmd = genCmdCast(*cmdFound); auxToNonAuxOutputAddress[0] = miflushDwCmd->getDestinationAddress(); cmdFound = expectMiFlush(++cmdFound, cmdListBcs.end()); miflushDwCmd = genCmdCast(*cmdFound); auxToNonAuxOutputAddress[1] = miflushDwCmd->getDestinationAddress(); cmdFound = expectCommand(++cmdFound, cmdListBcs.end()); cmdFound = expectMiFlush(++cmdFound, cmdListBcs.end()); miflushDwCmd = genCmdCast(*cmdFound); nonAuxToAuxOutputAddress[0] = miflushDwCmd->getDestinationAddress(); cmdFound = expectMiFlush(++cmdFound, cmdListBcs.end()); miflushDwCmd = genCmdCast(*cmdFound); nonAuxToAuxOutputAddress[1] = miflushDwCmd->getDestinationAddress(); } { auto cmdListQueue = getCmdList(commandQueue->getCS(0)); // Aux to NonAux auto cmdFound = expectCommand(cmdListQueue.begin(), cmdListQueue.end()); verifySemaphore(cmdFound, auxToNonAuxOutputAddress[0]); cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); verifySemaphore(cmdFound, auxToNonAuxOutputAddress[1]); // Walker cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); // NonAux to Aux cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); verifySemaphore(cmdFound, nonAuxToAuxOutputAddress[0]); cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); verifySemaphore(cmdFound, nonAuxToAuxOutputAddress[1]); } } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenConstructingCommandBufferThenSynchronizeKernel) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto buffer = createBuffer(1, true); setMockKernelArgs(std::array{{buffer.get()}}); auto mockCmdQ = static_cast *>(commandQueue.get()); mockCmdQ->overrideIsCacheFlushForBcsRequired.enabled = true; mockCmdQ->overrideIsCacheFlushForBcsRequired.returnValue = false; mockCmdQ->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); auto kernelNode = mockCmdQ->timestampPacketContainer->peekNodes()[0]; auto kernelNodeAddress = kernelNode->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd); auto cmdList = getCmdList(bcsCsr->getCS(0)); // Aux to nonAux auto cmdFound = expectCommand(cmdList.begin(), cmdList.end()); // semaphore before NonAux to Aux auto semaphore = expectCommand(++cmdFound, cmdList.end()); verifySemaphore(semaphore, kernelNodeAddress); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenConstructingCommandBufferThenSynchronizeCacheFlush) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using WALKER_TYPE = typename FamilyType::WALKER_TYPE; auto buffer = createBuffer(1, true); setMockKernelArgs(std::array{{buffer.get()}}); auto mockCmdQ = static_cast *>(commandQueue.get()); mockCmdQ->overrideIsCacheFlushForBcsRequired.enabled = true; mockCmdQ->overrideIsCacheFlushForBcsRequired.returnValue = true; mockCmdQ->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); auto cmdListBcs = getCmdList(bcsCsr->getCS(0)); auto cmdListQueue = getCmdList(mockCmdQ->getCS(0)); uint64_t cacheFlushWriteAddress = 0; { auto cmdFound = expectCommand(cmdListQueue.begin(), cmdListQueue.end()); cmdFound = expectPipeControl(++cmdFound, cmdListQueue.end()); auto pipeControlCmd = genCmdCast(*cmdFound); if (!pipeControlCmd->getDcFlushEnable()) { // skip pipe control with TimestampPacket write cmdFound = expectPipeControl(++cmdFound, cmdListQueue.end()); pipeControlCmd = genCmdCast(*cmdFound); } EXPECT_TRUE(pipeControlCmd->getDcFlushEnable()); EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable()); uint64_t low = pipeControlCmd->getAddress(); uint64_t high = pipeControlCmd->getAddressHigh(); cacheFlushWriteAddress = (high << 32) | low; EXPECT_NE(0u, cacheFlushWriteAddress); } { // Aux to nonAux auto cmdFound = expectCommand(cmdListBcs.begin(), cmdListBcs.end()); // semaphore before NonAux to Aux cmdFound = expectCommand(++cmdFound, cmdListBcs.end()); verifySemaphore(cmdFound, cacheFlushWriteAddress); } } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenConstructingCommandBufferThenSynchronizeEvents) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto buffer = createBuffer(1, true); setMockKernelArgs(std::array{{buffer.get()}}); auto event = make_releaseable(commandQueue.get(), CL_COMMAND_READ_BUFFER, 0, 0); MockTimestampPacketContainer eventDependencyContainer(*bcsCsr->getTimestampPacketAllocator(), 1); auto eventDependency = eventDependencyContainer.getNode(0); event->addTimestampPacketNodes(eventDependencyContainer); cl_event clEvent[] = {event.get()}; commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 1, clEvent, nullptr); auto eventDependencyAddress = eventDependency->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd); auto cmdList = getCmdList(bcsCsr->getCS(0)); // Barrier auto cmdFound = expectCommand(cmdList.begin(), cmdList.end()); // Event auto semaphore = expectCommand(++cmdFound, cmdList.end()); verifySemaphore(semaphore, eventDependencyAddress); cmdFound = expectCommand(++semaphore, cmdList.end()); expectCommand(++cmdFound, cmdList.end()); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenOutEventWhenDispatchingThenAssignNonAuxNodes) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto buffer0 = createBuffer(1, true); auto buffer1 = createBuffer(1, false); auto buffer2 = createBuffer(1, true); setMockKernelArgs(std::array{{buffer0.get(), buffer1.get(), buffer2.get()}}); cl_event clEvent; commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, &clEvent); auto event = castToObject(clEvent); auto &eventNodes = event->getTimestampPacketNodes()->peekNodes(); EXPECT_EQ(3u, eventNodes.size()); auto cmdListQueue = getCmdList(commandQueue->getCS(0)); auto cmdFound = expectCommand(cmdListQueue.begin(), cmdListQueue.end()); // NonAux to Aux cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); auto eventNodeAddress = eventNodes[1]->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd); verifySemaphore(cmdFound, eventNodeAddress); cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); eventNodeAddress = eventNodes[2]->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd); verifySemaphore(cmdFound, eventNodeAddress); clReleaseEvent(clEvent); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitAuxTranslationWhenDispatchingThenEstimateCmdBufferSize) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto &hwInfo = device->getHardwareInfo(); auto mockCmdQ = static_cast *>(commandQueue.get()); mockCmdQ->overrideIsCacheFlushForBcsRequired.enabled = true; mockCmdQ->overrideIsCacheFlushForBcsRequired.returnValue = false; auto buffer0 = createBuffer(1, true); auto buffer1 = createBuffer(1, false); auto buffer2 = createBuffer(1, true); MemObjsForAuxTranslation memObjects; memObjects.insert(buffer0.get()); memObjects.insert(buffer2.get()); size_t numBuffersToEstimate = 2; size_t dependencySize = numBuffersToEstimate * TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue(); setMockKernelArgs(std::array{{buffer0.get(), buffer1.get(), buffer2.get()}}); mockCmdQ->storeMultiDispatchInfo = true; mockCmdQ->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, lws, 0, nullptr, nullptr); MultiDispatchInfo &multiDispatchInfo = mockCmdQ->storedMultiDispatchInfo; DispatchInfo *firstDispatchInfo = multiDispatchInfo.begin(); DispatchInfo *lastDispatchInfo = &(*multiDispatchInfo.rbegin()); EXPECT_NE(firstDispatchInfo, lastDispatchInfo); // walker split EXPECT_EQ(dependencySize, firstDispatchInfo->dispatchInitCommands.estimateCommandsSize(memObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired())); EXPECT_EQ(0u, firstDispatchInfo->dispatchEpilogueCommands.estimateCommandsSize(memObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired())); EXPECT_EQ(0u, lastDispatchInfo->dispatchInitCommands.estimateCommandsSize(memObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired())); EXPECT_EQ(dependencySize, lastDispatchInfo->dispatchEpilogueCommands.estimateCommandsSize(memObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired())); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitAuxTranslationWithRequiredCacheFlushWhenDispatchingThenEstimateCmdBufferSize) { using WALKER_TYPE = typename FamilyType::WALKER_TYPE; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto &hwInfo = device->getHardwareInfo(); auto mockCmdQ = static_cast *>(commandQueue.get()); mockCmdQ->overrideIsCacheFlushForBcsRequired.enabled = true; mockCmdQ->overrideIsCacheFlushForBcsRequired.returnValue = true; auto buffer0 = createBuffer(1, true); auto buffer1 = createBuffer(1, false); auto buffer2 = createBuffer(1, true); MemObjsForAuxTranslation memObjects; memObjects.insert(buffer0.get()); memObjects.insert(buffer2.get()); size_t numBuffersToEstimate = 2; size_t dependencySize = numBuffersToEstimate * TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue(); size_t cacheFlushSize = MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo); setMockKernelArgs(std::array{{buffer0.get(), buffer1.get(), buffer2.get()}}); mockCmdQ->storeMultiDispatchInfo = true; mockCmdQ->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, lws, 0, nullptr, nullptr); MultiDispatchInfo &multiDispatchInfo = mockCmdQ->storedMultiDispatchInfo; DispatchInfo *firstDispatchInfo = multiDispatchInfo.begin(); DispatchInfo *lastDispatchInfo = &(*multiDispatchInfo.rbegin()); EXPECT_NE(firstDispatchInfo, lastDispatchInfo); // walker split EXPECT_EQ(dependencySize, firstDispatchInfo->dispatchInitCommands.estimateCommandsSize(memObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired())); EXPECT_EQ(0u, firstDispatchInfo->dispatchEpilogueCommands.estimateCommandsSize(memObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired())); EXPECT_EQ(0u, lastDispatchInfo->dispatchInitCommands.estimateCommandsSize(memObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired())); EXPECT_EQ(dependencySize + cacheFlushSize, lastDispatchInfo->dispatchEpilogueCommands.estimateCommandsSize(memObjects.size(), hwInfo, mockCmdQ->isCacheFlushForBcsRequired())); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenConstructingBlockedCommandBufferThenSynchronizeBarrier) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto buffer = createBuffer(1, true); setMockKernelArgs(std::array{{buffer.get()}}); UserEvent userEvent; cl_event waitlist[] = {&userEvent}; commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 1, waitlist, nullptr); userEvent.setStatus(CL_COMPLETE); auto cmdListCsr = getCmdList(gpgpuCsr->getCS(0)); auto pipeControl = expectPipeControl(cmdListCsr.begin(), cmdListCsr.end()); auto pipeControlCmd = genCmdCast(*pipeControl); uint64_t low = pipeControlCmd->getAddress(); uint64_t high = pipeControlCmd->getAddressHigh(); uint64_t barrierGpuAddress = (high << 32) | low; auto cmdList = getCmdList(bcsCsr->getCS(0)); auto semaphore = expectCommand(cmdList.begin(), cmdList.end()); verifySemaphore(semaphore, barrierGpuAddress); EXPECT_FALSE(commandQueue->isQueueBlocked()); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenConstructingBlockedCommandBufferThenSynchronizeEvents) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto buffer = createBuffer(1, true); setMockKernelArgs(std::array{{buffer.get()}}); auto event = make_releaseable(commandQueue.get(), CL_COMMAND_READ_BUFFER, 0, 0); MockTimestampPacketContainer eventDependencyContainer(*bcsCsr->getTimestampPacketAllocator(), 1); auto eventDependency = eventDependencyContainer.getNode(0); event->addTimestampPacketNodes(eventDependencyContainer); UserEvent userEvent; cl_event waitlist[] = {&userEvent, event.get()}; commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 2, waitlist, nullptr); userEvent.setStatus(CL_COMPLETE); auto eventDependencyAddress = eventDependency->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd); auto cmdList = getCmdList(bcsCsr->getCS(0)); // Barrier auto cmdFound = expectCommand(cmdList.begin(), cmdList.end()); // Event auto semaphore = expectCommand(++cmdFound, cmdList.end()); verifySemaphore(semaphore, eventDependencyAddress); cmdFound = expectCommand(++semaphore, cmdList.end()); expectCommand(++cmdFound, cmdList.end()); EXPECT_FALSE(commandQueue->isQueueBlocked()); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenConstructingBlockedCommandBufferThenSynchronizeKernel) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto buffer = createBuffer(1, true); setMockKernelArgs(std::array{{buffer.get()}}); auto mockCmdQ = static_cast *>(commandQueue.get()); UserEvent userEvent; cl_event waitlist[] = {&userEvent}; mockCmdQ->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 1, waitlist, nullptr); userEvent.setStatus(CL_COMPLETE); auto kernelNode = mockCmdQ->timestampPacketContainer->peekNodes()[0]; auto kernelNodeAddress = kernelNode->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd); auto cmdList = getCmdList(bcsCsr->getCS(0)); // Aux to nonAux auto cmdFound = expectCommand(cmdList.begin(), cmdList.end()); // semaphore before NonAux to Aux auto semaphore = expectCommand(++cmdFound, cmdList.end()); if (mockCmdQ->isCacheFlushForBcsRequired()) { semaphore = expectCommand(++semaphore, cmdList.end()); } verifySemaphore(semaphore, kernelNodeAddress); EXPECT_FALSE(commandQueue->isQueueBlocked()); } HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitTranslationWhenConstructingBlockedCommandBufferThenSynchronizeBcsOutput) { using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT; using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using WALKER_TYPE = typename FamilyType::WALKER_TYPE; auto buffer0 = createBuffer(1, true); auto buffer1 = createBuffer(1, true); setMockKernelArgs(std::array{{buffer0.get(), buffer1.get()}}); UserEvent userEvent; cl_event waitlist[] = {&userEvent}; commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 1, waitlist, nullptr); userEvent.setStatus(CL_COMPLETE); uint64_t auxToNonAuxOutputAddress[2] = {}; uint64_t nonAuxToAuxOutputAddress[2] = {}; { auto cmdListBcs = getCmdList(bcsCsr->getCS(0)); auto cmdFound = expectCommand(cmdListBcs.begin(), cmdListBcs.end()); cmdFound = expectMiFlush(++cmdFound, cmdListBcs.end()); auto miflushDwCmd = genCmdCast(*cmdFound); auxToNonAuxOutputAddress[0] = miflushDwCmd->getDestinationAddress(); cmdFound = expectMiFlush(++cmdFound, cmdListBcs.end()); miflushDwCmd = genCmdCast(*cmdFound); auxToNonAuxOutputAddress[1] = miflushDwCmd->getDestinationAddress(); cmdFound = expectCommand(++cmdFound, cmdListBcs.end()); cmdFound = expectMiFlush(++cmdFound, cmdListBcs.end()); miflushDwCmd = genCmdCast(*cmdFound); nonAuxToAuxOutputAddress[0] = miflushDwCmd->getDestinationAddress(); cmdFound = expectMiFlush(++cmdFound, cmdListBcs.end()); miflushDwCmd = genCmdCast(*cmdFound); nonAuxToAuxOutputAddress[1] = miflushDwCmd->getDestinationAddress(); } { auto ultCsr = static_cast *>(gpgpuCsr); auto cmdListQueue = getCmdList(*ultCsr->lastFlushedCommandStream); // Aux to NonAux auto cmdFound = expectCommand(cmdListQueue.begin(), cmdListQueue.end()); verifySemaphore(cmdFound, auxToNonAuxOutputAddress[0]); cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); verifySemaphore(cmdFound, auxToNonAuxOutputAddress[1]); // Walker cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); // NonAux to Aux cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); verifySemaphore(cmdFound, nonAuxToAuxOutputAddress[0]); cmdFound = expectCommand(++cmdFound, cmdListQueue.end()); verifySemaphore(cmdFound, nonAuxToAuxOutputAddress[1]); } EXPECT_FALSE(commandQueue->isQueueBlocked()); }