mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 22:12:59 +08:00
Fix makeNonResident for csr residency allocations
Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
b363a4f765
commit
e1bcad51f0
@@ -62,7 +62,7 @@ void CommandQueueImp::submitBatchBuffer(size_t offset, NEO::ResidencyContainer &
|
||||
NEO::QueueThrottle::HIGH, NEO::QueueSliceCount::defaultSliceCount,
|
||||
commandStream->getUsed(), commandStream, endingCmdPtr, false);
|
||||
|
||||
csr->submitBatchBuffer(batchBuffer, residencyContainer);
|
||||
csr->submitBatchBuffer(batchBuffer, csr->getResidencyAllocations());
|
||||
buffers.setCurrentFlushStamp(csr->obtainCurrentFlushStamp());
|
||||
}
|
||||
|
||||
|
||||
@@ -46,8 +46,7 @@ struct CommandQueueHw : public CommandQueueImp {
|
||||
size_t estimatePipelineSelect();
|
||||
void programPipelineSelect(NEO::LinearStream &commandStream);
|
||||
|
||||
MOCKABLE_VIRTUAL void handleScratchSpace(NEO::ResidencyContainer &residency,
|
||||
NEO::HeapContainer &heapContainer,
|
||||
MOCKABLE_VIRTUAL void handleScratchSpace(NEO::HeapContainer &heapContainer,
|
||||
NEO::ScratchSpaceController *scratchController,
|
||||
bool &gsbaState, bool &frontEndState,
|
||||
uint32_t perThreadScratchSpaceSize);
|
||||
|
||||
@@ -183,13 +183,12 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
|
||||
|
||||
spaceForResidency += residencyContainerSpaceForTagWrite;
|
||||
|
||||
residencyContainer.reserve(spaceForResidency);
|
||||
csr->getResidencyAllocations().reserve(spaceForResidency);
|
||||
|
||||
auto scratchSpaceController = csr->getScratchSpaceController();
|
||||
bool gsbaStateDirty = false;
|
||||
bool frontEndStateDirty = false;
|
||||
handleScratchSpace(residencyContainer,
|
||||
heapContainer,
|
||||
handleScratchSpace(heapContainer,
|
||||
scratchSpaceController,
|
||||
gsbaStateDirty, frontEndStateDirty,
|
||||
perThreadScratchSpaceSize);
|
||||
@@ -223,18 +222,18 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
|
||||
|
||||
const auto globalFenceAllocation = csr->getGlobalFenceAllocation();
|
||||
if (globalFenceAllocation) {
|
||||
residencyContainer.push_back(globalFenceAllocation);
|
||||
csr->makeResident(*globalFenceAllocation);
|
||||
}
|
||||
const auto workPartitionAllocation = csr->getWorkPartitionAllocation();
|
||||
if (workPartitionAllocation) {
|
||||
residencyContainer.push_back(workPartitionAllocation);
|
||||
csr->makeResident(*workPartitionAllocation);
|
||||
}
|
||||
|
||||
if (NEO::DebugManager.flags.EnableSWTags.get()) {
|
||||
NEO::SWTagsManager *tagsManager = neoDevice->getRootDeviceEnvironment().tagsManager.get();
|
||||
UNRECOVERABLE_IF(tagsManager == nullptr);
|
||||
residencyContainer.push_back(tagsManager->getBXMLHeapAllocation());
|
||||
residencyContainer.push_back(tagsManager->getSWTagHeapAllocation());
|
||||
csr->makeResident(*tagsManager->getBXMLHeapAllocation());
|
||||
csr->makeResident(*tagsManager->getSWTagHeapAllocation());
|
||||
tagsManager->insertBXMLHeapAddress<GfxFamily>(child);
|
||||
tagsManager->insertSWTagHeapAddress<GfxFamily>(child);
|
||||
}
|
||||
@@ -242,7 +241,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
|
||||
csr->programHardwareContext(child);
|
||||
|
||||
if (NEO::Debugger::isDebugEnabled(internalUsage) && device->getL0Debugger()) {
|
||||
residencyContainer.push_back(device->getL0Debugger()->getSbaTrackingBuffer(csr->getOsContext().getContextId()));
|
||||
csr->makeResident(*device->getL0Debugger()->getSbaTrackingBuffer(csr->getOsContext().getContextId()));
|
||||
}
|
||||
|
||||
if (!isCopyOnlyCommandQueue) {
|
||||
@@ -281,17 +280,17 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
|
||||
(neoDevice->getDebugger() != nullptr && NEO::Debugger::isDebugEnabled(internalUsage));
|
||||
|
||||
if (devicePreemption == NEO::PreemptionMode::MidThread) {
|
||||
residencyContainer.push_back(csr->getPreemptionAllocation());
|
||||
csr->makeResident(*csr->getPreemptionAllocation());
|
||||
}
|
||||
|
||||
if (sipKernelUsed) {
|
||||
auto sipIsa = NEO::SipKernel::getSipKernel(*neoDevice).getSipAllocation();
|
||||
residencyContainer.push_back(sipIsa);
|
||||
csr->makeResident(*sipIsa);
|
||||
}
|
||||
|
||||
if (NEO::Debugger::isDebugEnabled(internalUsage) && neoDevice->getDebugger()) {
|
||||
UNRECOVERABLE_IF(device->getDebugSurface() == nullptr);
|
||||
residencyContainer.push_back(device->getDebugSurface());
|
||||
csr->makeResident(*device->getDebugSurface());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -355,9 +354,9 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
|
||||
commandList->getPrintfFunctionContainer().end());
|
||||
|
||||
for (auto alloc : commandList->commandContainer.getResidencyContainer()) {
|
||||
if (residencyContainer.end() ==
|
||||
std::find(residencyContainer.begin(), residencyContainer.end(), alloc)) {
|
||||
residencyContainer.push_back(alloc);
|
||||
if (csr->getResidencyAllocations().end() ==
|
||||
std::find(csr->getResidencyAllocations().begin(), csr->getResidencyAllocations().end(), alloc)) {
|
||||
csr->makeResident(*alloc);
|
||||
|
||||
if (performMigration) {
|
||||
if (alloc &&
|
||||
@@ -385,7 +384,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
|
||||
commandQueuePreemptionMode = statePreemption;
|
||||
|
||||
if (hFence) {
|
||||
residencyContainer.push_back(&fence->getAllocation());
|
||||
csr->makeResident(fence->getAllocation());
|
||||
if (isCopyOnlyCommandQueue) {
|
||||
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(child, fence->getGpuAddress(), Fence::STATE_SIGNALED, false, true);
|
||||
} else {
|
||||
@@ -401,7 +400,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
|
||||
|
||||
dispatchTaskCountWrite(child, true);
|
||||
|
||||
residencyContainer.push_back(csr->getTagAllocation());
|
||||
csr->makeResident(*csr->getTagAllocation());
|
||||
void *endingCmd = nullptr;
|
||||
if (directSubmissionEnabled) {
|
||||
endingCmd = child.getSpace(0);
|
||||
@@ -417,17 +416,16 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
|
||||
memset(paddingPtr, 0, padding);
|
||||
}
|
||||
|
||||
submitBatchBuffer(ptrDiff(child.getCpuBase(), commandStream->getCpuBase()), residencyContainer, endingCmd);
|
||||
submitBatchBuffer(ptrDiff(child.getCpuBase(), commandStream->getCpuBase()), csr->getResidencyAllocations(), endingCmd);
|
||||
|
||||
this->taskCount = csr->peekTaskCount();
|
||||
|
||||
csr->makeSurfacePackNonResident(residencyContainer);
|
||||
csr->makeSurfacePackNonResident(csr->getResidencyAllocations());
|
||||
|
||||
if (getSynchronousMode() == ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS) {
|
||||
this->synchronize(std::numeric_limits<uint64_t>::max());
|
||||
}
|
||||
|
||||
this->residencyContainer.clear();
|
||||
this->heapContainer.clear();
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
|
||||
@@ -104,8 +104,7 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateStateBaseAddressCmdSize() {
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandQueueHw<gfxCoreFamily>::handleScratchSpace(NEO::ResidencyContainer &residency,
|
||||
NEO::HeapContainer &heapContainer,
|
||||
void CommandQueueHw<gfxCoreFamily>::handleScratchSpace(NEO::HeapContainer &heapContainer,
|
||||
NEO::ScratchSpaceController *scratchController,
|
||||
bool &gsbaState, bool &frontEndState,
|
||||
uint32_t perThreadScratchSpaceSize) {
|
||||
@@ -114,7 +113,7 @@ void CommandQueueHw<gfxCoreFamily>::handleScratchSpace(NEO::ResidencyContainer &
|
||||
scratchController->setRequiredScratchSpace(nullptr, 0u, perThreadScratchSpaceSize, 0u, csr->peekTaskCount(),
|
||||
csr->getOsContext(), gsbaState, frontEndState);
|
||||
auto scratchAllocation = scratchController->getScratchSpaceAllocation();
|
||||
residency.push_back(scratchAllocation);
|
||||
csr->makeResident(*scratchAllocation);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -96,7 +96,6 @@ struct CommandQueueImp : public CommandQueue {
|
||||
std::vector<Kernel *> printfFunctionContainer;
|
||||
bool gpgpuEnabled = false;
|
||||
CommandBufferManager buffers;
|
||||
NEO::ResidencyContainer residencyContainer;
|
||||
NEO::HeapContainer heapContainer;
|
||||
};
|
||||
|
||||
|
||||
@@ -25,6 +25,7 @@ struct WhiteBox<::L0::CommandQueue> : public ::L0::CommandQueueImp {
|
||||
using BaseClass::device;
|
||||
using BaseClass::preemptionCmdSyncProgramming;
|
||||
using BaseClass::printfFunctionContainer;
|
||||
using BaseClass::submitBatchBuffer;
|
||||
using BaseClass::synchronizeByPollingForTaskCount;
|
||||
using CommandQueue::commandQueuePreemptionMode;
|
||||
using CommandQueue::internalUsage;
|
||||
@@ -85,6 +86,7 @@ struct MockCommandQueueHw : public L0::CommandQueueHw<gfxCoreFamily> {
|
||||
using BaseClass::printfFunctionContainer;
|
||||
using L0::CommandQueue::internalUsage;
|
||||
using L0::CommandQueue::preemptionCmdSyncProgramming;
|
||||
using L0::CommandQueueImp::csr;
|
||||
|
||||
MockCommandQueueHw(L0::Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc) : L0::CommandQueueHw<gfxCoreFamily>(device, csr, desc) {
|
||||
}
|
||||
|
||||
@@ -217,6 +217,24 @@ HWTEST_F(CommandQueueCreate, given100CmdListsWhenExecutingThenCommandStreamIsNot
|
||||
commandQueue->destroy();
|
||||
}
|
||||
|
||||
HWTEST_F(CommandQueueCreate, givenContainerWithAllocationsWhenResidencyContainerIsEmptyThenMakeResidentWasNotCalled) {
|
||||
auto csr = std::make_unique<MockCommandStreamReceiver>(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
|
||||
csr->setupContext(*neoDevice->getDefaultEngine().osContext);
|
||||
const ze_command_queue_desc_t desc = {};
|
||||
ze_result_t returnValue;
|
||||
auto commandQueue = whitebox_cast(CommandQueue::create(productFamily,
|
||||
device,
|
||||
csr.get(),
|
||||
&desc,
|
||||
false,
|
||||
false,
|
||||
returnValue));
|
||||
ResidencyContainer container;
|
||||
commandQueue->submitBatchBuffer(0, container, nullptr);
|
||||
EXPECT_EQ(csr->makeResidentCalledTimes, 0u);
|
||||
commandQueue->destroy();
|
||||
}
|
||||
|
||||
TEST_F(CommandQueueCreate, whenCommandQueueCreatedThenExpectLinearStreamInitializedWithExpectedSize) {
|
||||
const ze_command_queue_desc_t desc = {};
|
||||
ze_result_t returnValue;
|
||||
@@ -580,7 +598,19 @@ HWTEST_F(CommandQueueCommandsSingleTile, givenCommandQueueWhenExecutingCommandLi
|
||||
}
|
||||
|
||||
HWTEST_F(CommandQueueCommandsMultiTile, givenCommandQueueOnMultiTileWhenExecutingCommandListsThenWorkPartitionAllocationIsMadeResident) {
|
||||
MockCsrHw2<FamilyType> csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
|
||||
class MyCsrMock : public MockCsrHw2<FamilyType> {
|
||||
using MockCsrHw2<FamilyType>::MockCsrHw2;
|
||||
|
||||
public:
|
||||
void makeResident(GraphicsAllocation &graphicsAllocation) override {
|
||||
if (expectedGa == &graphicsAllocation) {
|
||||
expectedGAWasMadeResident = true;
|
||||
}
|
||||
}
|
||||
GraphicsAllocation *expectedGa = nullptr;
|
||||
bool expectedGAWasMadeResident = false;
|
||||
};
|
||||
MyCsrMock csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
|
||||
csr.initializeTagAllocation();
|
||||
csr.createWorkPartitionAllocation(*neoDevice);
|
||||
csr.setupContext(*neoDevice->getDefaultEngine().osContext);
|
||||
@@ -597,12 +627,13 @@ HWTEST_F(CommandQueueCommandsMultiTile, givenCommandQueueOnMultiTileWhenExecutin
|
||||
|
||||
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, returnValue));
|
||||
auto commandListHandle = commandList->toHandle();
|
||||
auto workPartitionAllocation = csr.getWorkPartitionAllocation();
|
||||
csr.expectedGa = workPartitionAllocation;
|
||||
auto status = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
|
||||
EXPECT_EQ(status, ZE_RESULT_SUCCESS);
|
||||
|
||||
auto workPartitionAllocation = csr.getWorkPartitionAllocation();
|
||||
ASSERT_NE(nullptr, workPartitionAllocation);
|
||||
EXPECT_TRUE(isAllocationInResidencyContainer(csr, workPartitionAllocation));
|
||||
EXPECT_TRUE(csr.expectedGAWasMadeResident);
|
||||
|
||||
commandQueue->destroy();
|
||||
}
|
||||
@@ -877,12 +908,11 @@ class MockCommandQueue : public L0::CommandQueueHw<gfxCoreFamily> {
|
||||
MockCommandQueue(L0::Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc) : L0::CommandQueueHw<gfxCoreFamily>(device, csr, desc) {}
|
||||
using BaseClass = ::L0::CommandQueueHw<gfxCoreFamily>;
|
||||
|
||||
using BaseClass::csr;
|
||||
using BaseClass::heapContainer;
|
||||
using BaseClass::residencyContainer;
|
||||
|
||||
NEO::HeapContainer mockHeapContainer;
|
||||
void handleScratchSpace(NEO::ResidencyContainer &residency,
|
||||
NEO::HeapContainer &heapContainer,
|
||||
void handleScratchSpace(NEO::HeapContainer &heapContainer,
|
||||
NEO::ScratchSpaceController *scratchController,
|
||||
bool &gsbaState, bool &frontEndState,
|
||||
uint32_t perThreadScratchSpaceSize) override {
|
||||
@@ -966,7 +996,7 @@ HWTEST2_F(ExecuteCommandListTests, givenExecuteCommandListWhenItReturnsThenConta
|
||||
|
||||
commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
|
||||
|
||||
EXPECT_EQ(0u, commandQueue->residencyContainer.size());
|
||||
EXPECT_EQ(0u, commandQueue->csr->getResidencyAllocations().size());
|
||||
EXPECT_EQ(0u, commandQueue->heapContainer.size());
|
||||
|
||||
commandQueue->destroy();
|
||||
|
||||
@@ -552,7 +552,7 @@ struct MockScratchController : public ScratchSpaceController {
|
||||
OsContext &osContext,
|
||||
bool &stateBaseAddressDirty,
|
||||
bool &vfeStateDirty,
|
||||
NEO::ResidencyContainer &residency) override {
|
||||
NEO::CommandStreamReceiver *csr) override {
|
||||
}
|
||||
void reserveHeap(IndirectHeap::Type heapType, IndirectHeap *&indirectHeap) override{};
|
||||
};
|
||||
|
||||
@@ -20,6 +20,7 @@ class InternalAllocationStorage;
|
||||
class MemoryManager;
|
||||
struct HardwareInfo;
|
||||
class OsContext;
|
||||
class CommandStreamReceiver;
|
||||
|
||||
namespace ScratchSpaceConstants {
|
||||
constexpr size_t scratchSpaceOffsetFor64Bit = 4096u;
|
||||
@@ -69,7 +70,7 @@ class ScratchSpaceController {
|
||||
OsContext &osContext,
|
||||
bool &stateBaseAddressDirty,
|
||||
bool &vfeStateDirty,
|
||||
ResidencyContainer &residency) = 0;
|
||||
CommandStreamReceiver *csr) = 0;
|
||||
|
||||
protected:
|
||||
MemoryManager *getMemoryManager() const;
|
||||
|
||||
@@ -98,6 +98,6 @@ void ScratchSpaceControllerBase::programBindlessSurfaceStateForScratch(BindlessH
|
||||
OsContext &osContext,
|
||||
bool &stateBaseAddressDirty,
|
||||
bool &vfeStateDirty,
|
||||
ResidencyContainer &residency) {
|
||||
NEO::CommandStreamReceiver *csr) {
|
||||
}
|
||||
} // namespace NEO
|
||||
|
||||
@@ -42,7 +42,7 @@ class ScratchSpaceControllerBase : public ScratchSpaceController {
|
||||
OsContext &osContext,
|
||||
bool &stateBaseAddressDirty,
|
||||
bool &vfeStateDirty,
|
||||
ResidencyContainer &residency) override;
|
||||
NEO::CommandStreamReceiver *csr) override;
|
||||
|
||||
protected:
|
||||
void createScratchSpaceAllocation();
|
||||
|
||||
@@ -111,6 +111,9 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
|
||||
}
|
||||
|
||||
GraphicsAllocation *getClearColorAllocation() override { return nullptr; }
|
||||
void makeResident(GraphicsAllocation &gfxAllocation) override {
|
||||
makeResidentCalledTimes++;
|
||||
}
|
||||
|
||||
void postInitFlagsSetup() override {}
|
||||
|
||||
@@ -118,6 +121,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
|
||||
int *flushBatchedSubmissionsCallCounter = nullptr;
|
||||
uint32_t waitForCompletionWithTimeoutCalled = 0;
|
||||
uint32_t mockTagAddress = 0;
|
||||
uint32_t makeResidentCalledTimes = 0;
|
||||
bool multiOsContextCapable = false;
|
||||
bool memoryCompressionEnabled = false;
|
||||
bool downloadAllocationsCalled = false;
|
||||
|
||||
@@ -37,8 +37,8 @@ class MockScratchSpaceControllerBase : public ScratchSpaceControllerBase {
|
||||
OsContext &osContext,
|
||||
bool &stateBaseAddressDirty,
|
||||
bool &vfeStateDirty,
|
||||
NEO::ResidencyContainer &residency) override {
|
||||
ScratchSpaceControllerBase::programBindlessSurfaceStateForScratch(heapsHelper, requiredPerThreadScratchSize, requiredPerThreadPrivateScratchSize, currentTaskCount, osContext, stateBaseAddressDirty, vfeStateDirty, residencyContainer);
|
||||
NEO::CommandStreamReceiver *csr) override {
|
||||
ScratchSpaceControllerBase::programBindlessSurfaceStateForScratch(heapsHelper, requiredPerThreadScratchSize, requiredPerThreadPrivateScratchSize, currentTaskCount, osContext, stateBaseAddressDirty, vfeStateDirty, csr);
|
||||
programBindlessSurfaceStateForScratchCalled = true;
|
||||
}
|
||||
ResidencyContainer residencyContainer;
|
||||
@@ -79,7 +79,7 @@ HWTEST_F(ScratchComtrolerTests, givenCommandQueueWhenProgramHeapBindlessCalledTh
|
||||
bool gsbaStateDirty = false;
|
||||
bool frontEndStateDirty = false;
|
||||
HeapContainer heapContainer;
|
||||
scratchController->programBindlessSurfaceStateForScratch(nullptr, 0, 0, 0, *pDevice->getDefaultEngine().osContext, gsbaStateDirty, frontEndStateDirty, scratchController->residencyContainer);
|
||||
scratchController->programBindlessSurfaceStateForScratch(nullptr, 0, 0, 0, *pDevice->getDefaultEngine().osContext, gsbaStateDirty, frontEndStateDirty, &csr);
|
||||
|
||||
EXPECT_TRUE(static_cast<MockScratchSpaceControllerBase *>(scratchController.get())->programBindlessSurfaceStateForScratchCalled);
|
||||
}
|
||||
Reference in New Issue
Block a user