Fix CFE programming when usm transfer required

Resolves: NEO-6288

Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
Szymon Morek
2021-10-22 11:22:02 +00:00
committed by Compute-Runtime-Automation
parent f1e839bda2
commit dbeb263868
2 changed files with 87 additions and 11 deletions

View File

@@ -151,6 +151,13 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
size_t totalCmdBuffers = 0;
uint32_t perThreadScratchSpaceSize = 0;
NEO::PageFaultManager *pageFaultManager = nullptr;
if (performMigration) {
pageFaultManager = device->getDriverHandle()->getMemoryManager()->getPageFaultManager();
if (pageFaultManager == nullptr) {
performMigration = false;
}
}
for (auto i = 0u; i < numCommandLists; i++) {
auto commandList = CommandList::fromHandle(phCommandLists[i]);
@@ -189,6 +196,8 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
}
partitionCount = std::max(partitionCount, commandList->partitionCount);
commandList->csr = csr;
commandList->makeResidentAndMigrate(performMigration);
}
size_t linearStreamSizeEstimate = totalCmdBuffers * sizeof(MI_BATCH_BUFFER_START);
@@ -337,14 +346,6 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
}
}
NEO::PageFaultManager *pageFaultManager = nullptr;
if (performMigration) {
pageFaultManager = device->getDriverHandle()->getMemoryManager()->getPageFaultManager();
if (pageFaultManager == nullptr) {
performMigration = false;
}
}
for (auto i = 0u; i < numCommandLists; ++i) {
auto commandList = CommandList::fromHandle(phCommandLists[i]);
auto cmdBufferAllocations = commandList->commandContainer.getCmdBufferAllocations();
@@ -399,9 +400,6 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
printfFunctionContainer.insert(printfFunctionContainer.end(),
commandList->getPrintfFunctionContainer().begin(),
commandList->getPrintfFunctionContainer().end());
commandList->csr = csr;
commandList->makeResidentAndMigrate(performMigration);
}
if (performMigration) {

View File

@@ -20,6 +20,7 @@
#include "shared/test/common/mocks/mock_command_stream_receiver.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h"
#include "shared/test/common/mocks/mock_memory_manager.h"
#include "shared/test/common/mocks/mock_memory_operations_handler.h"
#include "shared/test/common/mocks/ult_device_factory.h"
#include "test.h"
@@ -2030,5 +2031,82 @@ TEST_F(CommandQueueCreate, givenOverrideCmdQueueSyncModeToSynchronousWhenCommand
commandQueue->destroy();
}
struct DeviceWithDualStorage : Test<DeviceFixture> {
void SetUp() override {
NEO::MockCompilerEnableGuard mock(true);
DebugManager.flags.EnableLocalMemory.set(1);
DebugManager.flags.AllocateSharedAllocationsWithCpuAndGpuStorage.set(1);
DeviceFixture::SetUp();
}
void TearDown() override {
DeviceFixture::TearDown();
}
DebugManagerStateRestore restorer;
};
HWTEST2_F(DeviceWithDualStorage, givenCmdListWithAppendedKernelAndUsmTransferAndBlitterDisabledWhenExecuteCmdListThenCfeStateOnceProgrammed, IsAtLeastXeHpCore) {
using CFE_STATE = typename FamilyType::CFE_STATE;
neoDevice->executionEnvironment->rootDeviceEnvironments[0]->memoryOperationsInterface = std::make_unique<MockMemoryOperationsHandler>();
ze_result_t res = ZE_RESULT_SUCCESS;
const ze_command_queue_desc_t desc = {};
auto commandQueue = whitebox_cast(CommandQueue::create(productFamily,
device,
neoDevice->getInternalEngine().commandStreamReceiver,
&desc,
false,
false,
res));
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
ASSERT_NE(nullptr, commandQueue);
auto commandList = std::unique_ptr<CommandList>(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, res)));
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
ASSERT_NE(nullptr, commandList);
Mock<Kernel> kernel;
kernel.immutableData.device = device;
size_t size = 10;
size_t alignment = 1u;
void *ptr = nullptr;
ze_device_mem_alloc_desc_t deviceDesc = {};
ze_host_mem_alloc_desc_t hostDesc = {};
res = context->allocSharedMem(device->toHandle(),
&deviceDesc,
&hostDesc,
size, alignment, &ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
auto gpuAlloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(ptr)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
kernel.residencyContainer.push_back(gpuAlloc);
ze_group_count_t dispatchFunctionArguments{1, 1, 1};
commandList->appendLaunchKernel(kernel.toHandle(), &dispatchFunctionArguments, nullptr, 0, nullptr);
auto deviceImp = static_cast<DeviceImp *>(device);
auto pageFaultCmdQueue = whitebox_cast(deviceImp->pageFaultCommandList->cmdQImmediate);
auto sizeBefore = commandQueue->commandStream->getUsed();
auto pageFaultSizeBefore = pageFaultCmdQueue->commandStream->getUsed();
auto handle = commandList->toHandle();
commandQueue->executeCommandLists(1, &handle, nullptr, true);
auto sizeAfter = commandQueue->commandStream->getUsed();
auto pageFaultSizeAfter = pageFaultCmdQueue->commandStream->getUsed();
EXPECT_LT(sizeBefore, sizeAfter);
EXPECT_LT(pageFaultSizeBefore, pageFaultSizeAfter);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(commandQueue->commandStream->getCpuBase(), 0),
sizeAfter);
auto count = findAll<CFE_STATE *>(commands.begin(), commands.end()).size();
EXPECT_EQ(0u, count);
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(pageFaultCmdQueue->commandStream->getCpuBase(), 0),
pageFaultSizeAfter);
count = findAll<CFE_STATE *>(commands.begin(), commands.end()).size();
EXPECT_EQ(1u, count);
res = context->freeMem(ptr);
ASSERT_EQ(ZE_RESULT_SUCCESS, res);
commandQueue->destroy();
}
} // namespace ult
} // namespace L0