From dbeb26386877bd6cff37f2fb59966b02876fd59d Mon Sep 17 00:00:00 2001 From: Szymon Morek Date: Fri, 22 Oct 2021 11:22:02 +0000 Subject: [PATCH] Fix CFE programming when usm transfer required Resolves: NEO-6288 Signed-off-by: Szymon Morek --- .../core/source/cmdqueue/cmdqueue_hw.inl | 20 +++-- .../sources/cmdqueue/test_cmdqueue.cpp | 78 +++++++++++++++++++ 2 files changed, 87 insertions(+), 11 deletions(-) diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl index c507f06ad5..e3d0699630 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl @@ -151,6 +151,13 @@ ze_result_t CommandQueueHw::executeCommandLists( size_t totalCmdBuffers = 0; uint32_t perThreadScratchSpaceSize = 0; + NEO::PageFaultManager *pageFaultManager = nullptr; + if (performMigration) { + pageFaultManager = device->getDriverHandle()->getMemoryManager()->getPageFaultManager(); + if (pageFaultManager == nullptr) { + performMigration = false; + } + } for (auto i = 0u; i < numCommandLists; i++) { auto commandList = CommandList::fromHandle(phCommandLists[i]); @@ -189,6 +196,8 @@ ze_result_t CommandQueueHw::executeCommandLists( } partitionCount = std::max(partitionCount, commandList->partitionCount); + commandList->csr = csr; + commandList->makeResidentAndMigrate(performMigration); } size_t linearStreamSizeEstimate = totalCmdBuffers * sizeof(MI_BATCH_BUFFER_START); @@ -337,14 +346,6 @@ ze_result_t CommandQueueHw::executeCommandLists( } } - NEO::PageFaultManager *pageFaultManager = nullptr; - if (performMigration) { - pageFaultManager = device->getDriverHandle()->getMemoryManager()->getPageFaultManager(); - if (pageFaultManager == nullptr) { - performMigration = false; - } - } - for (auto i = 0u; i < numCommandLists; ++i) { auto commandList = CommandList::fromHandle(phCommandLists[i]); auto cmdBufferAllocations = commandList->commandContainer.getCmdBufferAllocations(); @@ -399,9 +400,6 @@ ze_result_t CommandQueueHw::executeCommandLists( printfFunctionContainer.insert(printfFunctionContainer.end(), commandList->getPrintfFunctionContainer().begin(), commandList->getPrintfFunctionContainer().end()); - - commandList->csr = csr; - commandList->makeResidentAndMigrate(performMigration); } if (performMigration) { diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue.cpp index b64fa0e509..4f2f32409e 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue.cpp @@ -20,6 +20,7 @@ #include "shared/test/common/mocks/mock_command_stream_receiver.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/mock_memory_manager.h" +#include "shared/test/common/mocks/mock_memory_operations_handler.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "test.h" @@ -2030,5 +2031,82 @@ TEST_F(CommandQueueCreate, givenOverrideCmdQueueSyncModeToSynchronousWhenCommand commandQueue->destroy(); } + +struct DeviceWithDualStorage : Test { + void SetUp() override { + NEO::MockCompilerEnableGuard mock(true); + DebugManager.flags.EnableLocalMemory.set(1); + DebugManager.flags.AllocateSharedAllocationsWithCpuAndGpuStorage.set(1); + DeviceFixture::SetUp(); + } + void TearDown() override { + DeviceFixture::TearDown(); + } + DebugManagerStateRestore restorer; +}; + +HWTEST2_F(DeviceWithDualStorage, givenCmdListWithAppendedKernelAndUsmTransferAndBlitterDisabledWhenExecuteCmdListThenCfeStateOnceProgrammed, IsAtLeastXeHpCore) { + using CFE_STATE = typename FamilyType::CFE_STATE; + neoDevice->executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface = std::make_unique(); + ze_result_t res = ZE_RESULT_SUCCESS; + + const ze_command_queue_desc_t desc = {}; + auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, + device, + neoDevice->getInternalEngine().commandStreamReceiver, + &desc, + false, + false, + res)); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + ASSERT_NE(nullptr, commandQueue); + + auto commandList = std::unique_ptr(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, res))); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + ASSERT_NE(nullptr, commandList); + Mock kernel; + kernel.immutableData.device = device; + size_t size = 10; + size_t alignment = 1u; + void *ptr = nullptr; + ze_device_mem_alloc_desc_t deviceDesc = {}; + ze_host_mem_alloc_desc_t hostDesc = {}; + res = context->allocSharedMem(device->toHandle(), + &deviceDesc, + &hostDesc, + size, alignment, &ptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + auto gpuAlloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(ptr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex()); + kernel.residencyContainer.push_back(gpuAlloc); + + ze_group_count_t dispatchFunctionArguments{1, 1, 1}; + commandList->appendLaunchKernel(kernel.toHandle(), &dispatchFunctionArguments, nullptr, 0, nullptr); + auto deviceImp = static_cast(device); + auto pageFaultCmdQueue = whitebox_cast(deviceImp->pageFaultCommandList->cmdQImmediate); + + auto sizeBefore = commandQueue->commandStream->getUsed(); + auto pageFaultSizeBefore = pageFaultCmdQueue->commandStream->getUsed(); + auto handle = commandList->toHandle(); + commandQueue->executeCommandLists(1, &handle, nullptr, true); + auto sizeAfter = commandQueue->commandStream->getUsed(); + auto pageFaultSizeAfter = pageFaultCmdQueue->commandStream->getUsed(); + EXPECT_LT(sizeBefore, sizeAfter); + EXPECT_LT(pageFaultSizeBefore, pageFaultSizeAfter); + + GenCmdList commands; + CmdParse::parseCommandBuffer(commands, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), + sizeAfter); + auto count = findAll(commands.begin(), commands.end()).size(); + EXPECT_EQ(0u, count); + + CmdParse::parseCommandBuffer(commands, ptrOffset(pageFaultCmdQueue->commandStream->getCpuBase(), 0), + pageFaultSizeAfter); + count = findAll(commands.begin(), commands.end()).size(); + EXPECT_EQ(1u, count); + + res = context->freeMem(ptr); + ASSERT_EQ(ZE_RESULT_SUCCESS, res); + commandQueue->destroy(); +} } // namespace ult } // namespace L0