Fix makeNonResident for csr residency allocations

Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
Maciej Plewka
2021-06-09 12:02:35 +00:00
committed by Compute-Runtime-Automation
parent b363a4f765
commit e1bcad51f0
13 changed files with 72 additions and 40 deletions

View File

@@ -62,7 +62,7 @@ void CommandQueueImp::submitBatchBuffer(size_t offset, NEO::ResidencyContainer &
NEO::QueueThrottle::HIGH, NEO::QueueSliceCount::defaultSliceCount,
commandStream->getUsed(), commandStream, endingCmdPtr, false);
csr->submitBatchBuffer(batchBuffer, residencyContainer);
csr->submitBatchBuffer(batchBuffer, csr->getResidencyAllocations());
buffers.setCurrentFlushStamp(csr->obtainCurrentFlushStamp());
}

View File

@@ -46,8 +46,7 @@ struct CommandQueueHw : public CommandQueueImp {
size_t estimatePipelineSelect();
void programPipelineSelect(NEO::LinearStream &commandStream);
MOCKABLE_VIRTUAL void handleScratchSpace(NEO::ResidencyContainer &residency,
NEO::HeapContainer &heapContainer,
MOCKABLE_VIRTUAL void handleScratchSpace(NEO::HeapContainer &heapContainer,
NEO::ScratchSpaceController *scratchController,
bool &gsbaState, bool &frontEndState,
uint32_t perThreadScratchSpaceSize);

View File

@@ -183,13 +183,12 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
spaceForResidency += residencyContainerSpaceForTagWrite;
residencyContainer.reserve(spaceForResidency);
csr->getResidencyAllocations().reserve(spaceForResidency);
auto scratchSpaceController = csr->getScratchSpaceController();
bool gsbaStateDirty = false;
bool frontEndStateDirty = false;
handleScratchSpace(residencyContainer,
heapContainer,
handleScratchSpace(heapContainer,
scratchSpaceController,
gsbaStateDirty, frontEndStateDirty,
perThreadScratchSpaceSize);
@@ -223,18 +222,18 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
const auto globalFenceAllocation = csr->getGlobalFenceAllocation();
if (globalFenceAllocation) {
residencyContainer.push_back(globalFenceAllocation);
csr->makeResident(*globalFenceAllocation);
}
const auto workPartitionAllocation = csr->getWorkPartitionAllocation();
if (workPartitionAllocation) {
residencyContainer.push_back(workPartitionAllocation);
csr->makeResident(*workPartitionAllocation);
}
if (NEO::DebugManager.flags.EnableSWTags.get()) {
NEO::SWTagsManager *tagsManager = neoDevice->getRootDeviceEnvironment().tagsManager.get();
UNRECOVERABLE_IF(tagsManager == nullptr);
residencyContainer.push_back(tagsManager->getBXMLHeapAllocation());
residencyContainer.push_back(tagsManager->getSWTagHeapAllocation());
csr->makeResident(*tagsManager->getBXMLHeapAllocation());
csr->makeResident(*tagsManager->getSWTagHeapAllocation());
tagsManager->insertBXMLHeapAddress<GfxFamily>(child);
tagsManager->insertSWTagHeapAddress<GfxFamily>(child);
}
@@ -242,7 +241,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
csr->programHardwareContext(child);
if (NEO::Debugger::isDebugEnabled(internalUsage) && device->getL0Debugger()) {
residencyContainer.push_back(device->getL0Debugger()->getSbaTrackingBuffer(csr->getOsContext().getContextId()));
csr->makeResident(*device->getL0Debugger()->getSbaTrackingBuffer(csr->getOsContext().getContextId()));
}
if (!isCopyOnlyCommandQueue) {
@@ -281,17 +280,17 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
(neoDevice->getDebugger() != nullptr && NEO::Debugger::isDebugEnabled(internalUsage));
if (devicePreemption == NEO::PreemptionMode::MidThread) {
residencyContainer.push_back(csr->getPreemptionAllocation());
csr->makeResident(*csr->getPreemptionAllocation());
}
if (sipKernelUsed) {
auto sipIsa = NEO::SipKernel::getSipKernel(*neoDevice).getSipAllocation();
residencyContainer.push_back(sipIsa);
csr->makeResident(*sipIsa);
}
if (NEO::Debugger::isDebugEnabled(internalUsage) && neoDevice->getDebugger()) {
UNRECOVERABLE_IF(device->getDebugSurface() == nullptr);
residencyContainer.push_back(device->getDebugSurface());
csr->makeResident(*device->getDebugSurface());
}
}
@@ -355,9 +354,9 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
commandList->getPrintfFunctionContainer().end());
for (auto alloc : commandList->commandContainer.getResidencyContainer()) {
if (residencyContainer.end() ==
std::find(residencyContainer.begin(), residencyContainer.end(), alloc)) {
residencyContainer.push_back(alloc);
if (csr->getResidencyAllocations().end() ==
std::find(csr->getResidencyAllocations().begin(), csr->getResidencyAllocations().end(), alloc)) {
csr->makeResident(*alloc);
if (performMigration) {
if (alloc &&
@@ -385,7 +384,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
commandQueuePreemptionMode = statePreemption;
if (hFence) {
residencyContainer.push_back(&fence->getAllocation());
csr->makeResident(fence->getAllocation());
if (isCopyOnlyCommandQueue) {
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(child, fence->getGpuAddress(), Fence::STATE_SIGNALED, false, true);
} else {
@@ -401,7 +400,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
dispatchTaskCountWrite(child, true);
residencyContainer.push_back(csr->getTagAllocation());
csr->makeResident(*csr->getTagAllocation());
void *endingCmd = nullptr;
if (directSubmissionEnabled) {
endingCmd = child.getSpace(0);
@@ -417,17 +416,16 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
memset(paddingPtr, 0, padding);
}
submitBatchBuffer(ptrDiff(child.getCpuBase(), commandStream->getCpuBase()), residencyContainer, endingCmd);
submitBatchBuffer(ptrDiff(child.getCpuBase(), commandStream->getCpuBase()), csr->getResidencyAllocations(), endingCmd);
this->taskCount = csr->peekTaskCount();
csr->makeSurfacePackNonResident(residencyContainer);
csr->makeSurfacePackNonResident(csr->getResidencyAllocations());
if (getSynchronousMode() == ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS) {
this->synchronize(std::numeric_limits<uint64_t>::max());
}
this->residencyContainer.clear();
this->heapContainer.clear();
return ZE_RESULT_SUCCESS;

View File

@@ -104,8 +104,7 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateStateBaseAddressCmdSize() {
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::handleScratchSpace(NEO::ResidencyContainer &residency,
NEO::HeapContainer &heapContainer,
void CommandQueueHw<gfxCoreFamily>::handleScratchSpace(NEO::HeapContainer &heapContainer,
NEO::ScratchSpaceController *scratchController,
bool &gsbaState, bool &frontEndState,
uint32_t perThreadScratchSpaceSize) {
@@ -114,7 +113,7 @@ void CommandQueueHw<gfxCoreFamily>::handleScratchSpace(NEO::ResidencyContainer &
scratchController->setRequiredScratchSpace(nullptr, 0u, perThreadScratchSpaceSize, 0u, csr->peekTaskCount(),
csr->getOsContext(), gsbaState, frontEndState);
auto scratchAllocation = scratchController->getScratchSpaceAllocation();
residency.push_back(scratchAllocation);
csr->makeResident(*scratchAllocation);
}
}

View File

@@ -96,7 +96,6 @@ struct CommandQueueImp : public CommandQueue {
std::vector<Kernel *> printfFunctionContainer;
bool gpgpuEnabled = false;
CommandBufferManager buffers;
NEO::ResidencyContainer residencyContainer;
NEO::HeapContainer heapContainer;
};

View File

@@ -25,6 +25,7 @@ struct WhiteBox<::L0::CommandQueue> : public ::L0::CommandQueueImp {
using BaseClass::device;
using BaseClass::preemptionCmdSyncProgramming;
using BaseClass::printfFunctionContainer;
using BaseClass::submitBatchBuffer;
using BaseClass::synchronizeByPollingForTaskCount;
using CommandQueue::commandQueuePreemptionMode;
using CommandQueue::internalUsage;
@@ -85,6 +86,7 @@ struct MockCommandQueueHw : public L0::CommandQueueHw<gfxCoreFamily> {
using BaseClass::printfFunctionContainer;
using L0::CommandQueue::internalUsage;
using L0::CommandQueue::preemptionCmdSyncProgramming;
using L0::CommandQueueImp::csr;
MockCommandQueueHw(L0::Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc) : L0::CommandQueueHw<gfxCoreFamily>(device, csr, desc) {
}

View File

@@ -217,6 +217,24 @@ HWTEST_F(CommandQueueCreate, given100CmdListsWhenExecutingThenCommandStreamIsNot
commandQueue->destroy();
}
HWTEST_F(CommandQueueCreate, givenContainerWithAllocationsWhenResidencyContainerIsEmptyThenMakeResidentWasNotCalled) {
auto csr = std::make_unique<MockCommandStreamReceiver>(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
csr->setupContext(*neoDevice->getDefaultEngine().osContext);
const ze_command_queue_desc_t desc = {};
ze_result_t returnValue;
auto commandQueue = whitebox_cast(CommandQueue::create(productFamily,
device,
csr.get(),
&desc,
false,
false,
returnValue));
ResidencyContainer container;
commandQueue->submitBatchBuffer(0, container, nullptr);
EXPECT_EQ(csr->makeResidentCalledTimes, 0u);
commandQueue->destroy();
}
TEST_F(CommandQueueCreate, whenCommandQueueCreatedThenExpectLinearStreamInitializedWithExpectedSize) {
const ze_command_queue_desc_t desc = {};
ze_result_t returnValue;
@@ -580,7 +598,19 @@ HWTEST_F(CommandQueueCommandsSingleTile, givenCommandQueueWhenExecutingCommandLi
}
HWTEST_F(CommandQueueCommandsMultiTile, givenCommandQueueOnMultiTileWhenExecutingCommandListsThenWorkPartitionAllocationIsMadeResident) {
MockCsrHw2<FamilyType> csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
class MyCsrMock : public MockCsrHw2<FamilyType> {
using MockCsrHw2<FamilyType>::MockCsrHw2;
public:
void makeResident(GraphicsAllocation &graphicsAllocation) override {
if (expectedGa == &graphicsAllocation) {
expectedGAWasMadeResident = true;
}
}
GraphicsAllocation *expectedGa = nullptr;
bool expectedGAWasMadeResident = false;
};
MyCsrMock csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
csr.initializeTagAllocation();
csr.createWorkPartitionAllocation(*neoDevice);
csr.setupContext(*neoDevice->getDefaultEngine().osContext);
@@ -597,12 +627,13 @@ HWTEST_F(CommandQueueCommandsMultiTile, givenCommandQueueOnMultiTileWhenExecutin
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, returnValue));
auto commandListHandle = commandList->toHandle();
auto workPartitionAllocation = csr.getWorkPartitionAllocation();
csr.expectedGa = workPartitionAllocation;
auto status = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
EXPECT_EQ(status, ZE_RESULT_SUCCESS);
auto workPartitionAllocation = csr.getWorkPartitionAllocation();
ASSERT_NE(nullptr, workPartitionAllocation);
EXPECT_TRUE(isAllocationInResidencyContainer(csr, workPartitionAllocation));
EXPECT_TRUE(csr.expectedGAWasMadeResident);
commandQueue->destroy();
}
@@ -877,12 +908,11 @@ class MockCommandQueue : public L0::CommandQueueHw<gfxCoreFamily> {
MockCommandQueue(L0::Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc) : L0::CommandQueueHw<gfxCoreFamily>(device, csr, desc) {}
using BaseClass = ::L0::CommandQueueHw<gfxCoreFamily>;
using BaseClass::csr;
using BaseClass::heapContainer;
using BaseClass::residencyContainer;
NEO::HeapContainer mockHeapContainer;
void handleScratchSpace(NEO::ResidencyContainer &residency,
NEO::HeapContainer &heapContainer,
void handleScratchSpace(NEO::HeapContainer &heapContainer,
NEO::ScratchSpaceController *scratchController,
bool &gsbaState, bool &frontEndState,
uint32_t perThreadScratchSpaceSize) override {
@@ -966,7 +996,7 @@ HWTEST2_F(ExecuteCommandListTests, givenExecuteCommandListWhenItReturnsThenConta
commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
EXPECT_EQ(0u, commandQueue->residencyContainer.size());
EXPECT_EQ(0u, commandQueue->csr->getResidencyAllocations().size());
EXPECT_EQ(0u, commandQueue->heapContainer.size());
commandQueue->destroy();

View File

@@ -552,7 +552,7 @@ struct MockScratchController : public ScratchSpaceController {
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty,
NEO::ResidencyContainer &residency) override {
NEO::CommandStreamReceiver *csr) override {
}
void reserveHeap(IndirectHeap::Type heapType, IndirectHeap *&indirectHeap) override{};
};

View File

@@ -20,6 +20,7 @@ class InternalAllocationStorage;
class MemoryManager;
struct HardwareInfo;
class OsContext;
class CommandStreamReceiver;
namespace ScratchSpaceConstants {
constexpr size_t scratchSpaceOffsetFor64Bit = 4096u;
@@ -69,7 +70,7 @@ class ScratchSpaceController {
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty,
ResidencyContainer &residency) = 0;
CommandStreamReceiver *csr) = 0;
protected:
MemoryManager *getMemoryManager() const;

View File

@@ -98,6 +98,6 @@ void ScratchSpaceControllerBase::programBindlessSurfaceStateForScratch(BindlessH
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty,
ResidencyContainer &residency) {
NEO::CommandStreamReceiver *csr) {
}
} // namespace NEO

View File

@@ -42,7 +42,7 @@ class ScratchSpaceControllerBase : public ScratchSpaceController {
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty,
ResidencyContainer &residency) override;
NEO::CommandStreamReceiver *csr) override;
protected:
void createScratchSpaceAllocation();

View File

@@ -111,6 +111,9 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
}
GraphicsAllocation *getClearColorAllocation() override { return nullptr; }
void makeResident(GraphicsAllocation &gfxAllocation) override {
makeResidentCalledTimes++;
}
void postInitFlagsSetup() override {}
@@ -118,6 +121,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
int *flushBatchedSubmissionsCallCounter = nullptr;
uint32_t waitForCompletionWithTimeoutCalled = 0;
uint32_t mockTagAddress = 0;
uint32_t makeResidentCalledTimes = 0;
bool multiOsContextCapable = false;
bool memoryCompressionEnabled = false;
bool downloadAllocationsCalled = false;

View File

@@ -37,8 +37,8 @@ class MockScratchSpaceControllerBase : public ScratchSpaceControllerBase {
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty,
NEO::ResidencyContainer &residency) override {
ScratchSpaceControllerBase::programBindlessSurfaceStateForScratch(heapsHelper, requiredPerThreadScratchSize, requiredPerThreadPrivateScratchSize, currentTaskCount, osContext, stateBaseAddressDirty, vfeStateDirty, residencyContainer);
NEO::CommandStreamReceiver *csr) override {
ScratchSpaceControllerBase::programBindlessSurfaceStateForScratch(heapsHelper, requiredPerThreadScratchSize, requiredPerThreadPrivateScratchSize, currentTaskCount, osContext, stateBaseAddressDirty, vfeStateDirty, csr);
programBindlessSurfaceStateForScratchCalled = true;
}
ResidencyContainer residencyContainer;
@@ -79,7 +79,7 @@ HWTEST_F(ScratchComtrolerTests, givenCommandQueueWhenProgramHeapBindlessCalledTh
bool gsbaStateDirty = false;
bool frontEndStateDirty = false;
HeapContainer heapContainer;
scratchController->programBindlessSurfaceStateForScratch(nullptr, 0, 0, 0, *pDevice->getDefaultEngine().osContext, gsbaStateDirty, frontEndStateDirty, scratchController->residencyContainer);
scratchController->programBindlessSurfaceStateForScratch(nullptr, 0, 0, 0, *pDevice->getDefaultEngine().osContext, gsbaStateDirty, frontEndStateDirty, &csr);
EXPECT_TRUE(static_cast<MockScratchSpaceControllerBase *>(scratchController.get())->programBindlessSurfaceStateForScratchCalled);
}