Fix root device residency for TagNode

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2021-05-14 16:46:50 +00:00
committed by Compute-Runtime-Automation
parent 8e691711e9
commit 9c5dee54dc
12 changed files with 77 additions and 26 deletions

View File

@ -1143,7 +1143,7 @@ HWTEST_F(EventTest, WhenGetHwTimeStampsAllocationThenValidPointerIsReturned) {
std::unique_ptr<Event> event(new Event(myCmdQ.get(), CL_COMMAND_COPY_BUFFER, 0, 0));
ASSERT_NE(nullptr, event);
GraphicsAllocation *allocation = event->getHwTimeStampNode()->getBaseGraphicsAllocation();
GraphicsAllocation *allocation = event->getHwTimeStampNode()->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation();
ASSERT_NE(nullptr, allocation);
void *memoryStorage = allocation->getUnderlyingBuffer();
@ -1164,7 +1164,7 @@ HWTEST_F(EventTest, WhenEventIsCreatedThenHwTimeStampsMemoryIsPlacedInGraphicsAl
HwTimeStamps *timeStamps = static_cast<TagNode<HwTimeStamps> *>(event->getHwTimeStampNode())->tagForCpuAccess;
ASSERT_NE(nullptr, timeStamps);
GraphicsAllocation *allocation = event->getHwTimeStampNode()->getBaseGraphicsAllocation();
GraphicsAllocation *allocation = event->getHwTimeStampNode()->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation();
ASSERT_NE(nullptr, allocation);
void *memoryStorage = allocation->getUnderlyingBuffer();

View File

@ -1403,7 +1403,7 @@ HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingNonBlockedT
auto secondNode = cmdQ->timestampPacketContainer->peekNodes().at(0);
EXPECT_NE(firstNode->getBaseGraphicsAllocation(), secondNode->getBaseGraphicsAllocation());
EXPECT_TRUE(csr.isMadeResident(firstNode->getBaseGraphicsAllocation(), csr.taskCount));
EXPECT_TRUE(csr.isMadeResident(firstNode->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(), csr.taskCount));
}
HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingBlockedThenMakeItResident) {
@ -1427,9 +1427,9 @@ HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingBlockedThen
auto secondNode = cmdQ->timestampPacketContainer->peekNodes().at(0);
EXPECT_NE(firstNode->getBaseGraphicsAllocation(), secondNode->getBaseGraphicsAllocation());
EXPECT_FALSE(csr.isMadeResident(firstNode->getBaseGraphicsAllocation(), csr.taskCount));
EXPECT_FALSE(csr.isMadeResident(firstNode->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(), csr.taskCount));
userEvent.setStatus(CL_COMPLETE);
EXPECT_TRUE(csr.isMadeResident(firstNode->getBaseGraphicsAllocation(), csr.taskCount));
EXPECT_TRUE(csr.isMadeResident(firstNode->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(), csr.taskCount));
cmdQ->isQueueBlocked();
}
@ -1591,8 +1591,8 @@ HWTEST_F(TimestampPacketTests, givenEventsWaitlistFromDifferentDevicesWhenEnqueu
cmdQ1->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 2, waitlist, nullptr);
EXPECT_NE(tagNode1->getBaseGraphicsAllocation(), tagNode2->getBaseGraphicsAllocation());
EXPECT_TRUE(ultCsr.isMadeResident(tagNode1->getBaseGraphicsAllocation(), ultCsr.taskCount));
EXPECT_TRUE(ultCsr.isMadeResident(tagNode2->getBaseGraphicsAllocation(), ultCsr.taskCount));
EXPECT_TRUE(ultCsr.isMadeResident(tagNode1->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(), ultCsr.taskCount));
EXPECT_TRUE(ultCsr.isMadeResident(tagNode2->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(), ultCsr.taskCount));
}
HWTEST_F(TimestampPacketTests, givenEventsWaitlistFromDifferentCSRsWhenEnqueueingThenMakeAllTimestampsResident) {
@ -1628,8 +1628,8 @@ HWTEST_F(TimestampPacketTests, givenEventsWaitlistFromDifferentCSRsWhenEnqueuein
cmdQ1->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 2, waitlist, nullptr);
EXPECT_NE(tagNode1->getBaseGraphicsAllocation(), tagNode2->getBaseGraphicsAllocation());
EXPECT_TRUE(ultCsr.isMadeResident(tagNode1->getBaseGraphicsAllocation(), ultCsr.taskCount));
EXPECT_TRUE(ultCsr.isMadeResident(tagNode2->getBaseGraphicsAllocation(), ultCsr.taskCount));
EXPECT_TRUE(ultCsr.isMadeResident(tagNode1->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(), ultCsr.taskCount));
EXPECT_TRUE(ultCsr.isMadeResident(tagNode2->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(), ultCsr.taskCount));
}
HWTEST_F(TimestampPacketTests, givenTimestampPacketWhenEnqueueingNonBlockedThenMakeItResident) {
@ -1643,7 +1643,7 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWhenEnqueueingNonBlockedThenM
cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
auto timestampPacketNode = cmdQ.timestampPacketContainer->peekNodes().at(0);
EXPECT_TRUE(csr.isMadeResident(timestampPacketNode->getBaseGraphicsAllocation(), csr.taskCount));
EXPECT_TRUE(csr.isMadeResident(timestampPacketNode->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(), csr.taskCount));
}
HWTEST_F(TimestampPacketTests, givenTimestampPacketWhenEnqueueingBlockedThenMakeItResidentOnSubmit) {
@ -1662,9 +1662,9 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWhenEnqueueingBlockedThenMake
cmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 1, &clEvent, nullptr);
auto timestampPacketNode = cmdQ->timestampPacketContainer->peekNodes().at(0);
EXPECT_FALSE(csr.isMadeResident(timestampPacketNode->getBaseGraphicsAllocation(), csr.taskCount));
EXPECT_FALSE(csr.isMadeResident(timestampPacketNode->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(), csr.taskCount));
userEvent.setStatus(CL_COMPLETE);
EXPECT_TRUE(csr.isMadeResident(timestampPacketNode->getBaseGraphicsAllocation(), csr.taskCount));
EXPECT_TRUE(csr.isMadeResident(timestampPacketNode->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(), csr.taskCount));
cmdQ->isQueueBlocked();
}

View File

@ -42,6 +42,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::isBlitterDirectSubmissionEnabled;
using BaseClass::isDirectSubmissionEnabled;
using BaseClass::isPerDssBackedBufferSent;
using BaseClass::makeResident;
using BaseClass::perDssBackedBuffer;
using BaseClass::programEnginePrologue;
using BaseClass::programPerDssBackedBuffer;

View File

@ -459,15 +459,15 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockedBlitEnqueueWhenUnblockingThenMake
auto outputDependency = mockCmdQ->timestampPacketContainer->peekNodes()[0];
EXPECT_NE(outputDependency, dependencyFromPreviousEnqueue);
EXPECT_FALSE(bcsCsr->isMadeResident(dependencyFromPreviousEnqueue->getBaseGraphicsAllocation()));
EXPECT_FALSE(bcsCsr->isMadeResident(outputDependency->getBaseGraphicsAllocation()));
EXPECT_FALSE(bcsCsr->isMadeResident(eventDependency->getBaseGraphicsAllocation()));
EXPECT_FALSE(bcsCsr->isMadeResident(dependencyFromPreviousEnqueue->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation()));
EXPECT_FALSE(bcsCsr->isMadeResident(outputDependency->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation()));
EXPECT_FALSE(bcsCsr->isMadeResident(eventDependency->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation()));
userEvent->setStatus(CL_COMPLETE);
EXPECT_TRUE(bcsCsr->isMadeResident(dependencyFromPreviousEnqueue->getBaseGraphicsAllocation(), bcsCsr->taskCount));
EXPECT_TRUE(bcsCsr->isMadeResident(outputDependency->getBaseGraphicsAllocation(), bcsCsr->taskCount));
EXPECT_TRUE(bcsCsr->isMadeResident(eventDependency->getBaseGraphicsAllocation(), bcsCsr->taskCount));
EXPECT_TRUE(bcsCsr->isMadeResident(dependencyFromPreviousEnqueue->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(), bcsCsr->taskCount));
EXPECT_TRUE(bcsCsr->isMadeResident(outputDependency->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(), bcsCsr->taskCount));
EXPECT_TRUE(bcsCsr->isMadeResident(eventDependency->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(), bcsCsr->taskCount));
}
HWTEST_TEMPLATED_F(BcsBufferTests, givenMapAllocationWhenEnqueueingReadOrWriteBufferThenStoreMapAllocationInDispatchParameters) {
@ -874,8 +874,8 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenInputAndOutputTimestampPacketWhenBlitCal
EXPECT_NE(outputTimestampPacketAllocation, inputTimestampPacketAllocation);
EXPECT_EQ(cmdQ->taskCount, inputTimestampPacketAllocation->getTaskCount(bcsCsr->getOsContext().getContextId()));
EXPECT_EQ(cmdQ->taskCount, outputTimestampPacketAllocation->getTaskCount(bcsCsr->getOsContext().getContextId()));
EXPECT_EQ(cmdQ->taskCount, inputTimestampPacketAllocation->getDefaultGraphicsAllocation()->getTaskCount(bcsCsr->getOsContext().getContextId()));
EXPECT_EQ(cmdQ->taskCount, outputTimestampPacketAllocation->getDefaultGraphicsAllocation()->getTaskCount(bcsCsr->getOsContext().getContextId()));
}
HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingWriteBufferWhenUsingBcsThenCallWait) {

View File

@ -611,7 +611,7 @@ TEST_F(PerformanceCountersMetricsLibraryTest, WhenGettingHwPerfCounterAllocation
std::unique_ptr<Event> event(new Event(queue.get(), CL_COMMAND_COPY_BUFFER, 0, 0));
ASSERT_NE(nullptr, event);
GraphicsAllocation *allocation = event->getHwPerfCounterNode()->getBaseGraphicsAllocation();
GraphicsAllocation *allocation = event->getHwPerfCounterNode()->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation();
ASSERT_NE(nullptr, allocation);
void *memoryStorage = allocation->getUnderlyingBuffer();
@ -637,7 +637,7 @@ TEST_F(PerformanceCountersMetricsLibraryTest, WhenCreatingEventThenHwPerfCounter
HwPerfCounter *perfCounter = static_cast<TagNode<HwPerfCounter> *>(event->getHwPerfCounterNode())->tagForCpuAccess;
ASSERT_NE(nullptr, perfCounter);
GraphicsAllocation *allocation = event->getHwPerfCounterNode()->getBaseGraphicsAllocation();
GraphicsAllocation *allocation = event->getHwPerfCounterNode()->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation();
ASSERT_NE(nullptr, allocation);
void *memoryStorage = allocation->getUnderlyingBuffer();

View File

@ -1933,7 +1933,7 @@ TEST(WddmMemoryManagerCleanupTest, givenUsedTagAllocationInWddmMemoryManagerWhen
auto tagAllocator = csr->getEventPerfCountAllocator(100);
auto allocation = tagAllocator->getTag()->getBaseGraphicsAllocation();
allocation->updateTaskCount(1, csr->getOsContext().getContextId());
allocation->getDefaultGraphicsAllocation()->updateTaskCount(1, csr->getOsContext().getContextId());
csr.reset();
EXPECT_NO_THROW(executionEnvironment.memoryManager.reset());
}

View File

@ -12,6 +12,7 @@
#include "shared/test/common/mocks/ult_device_factory.h"
#include "opencl/test/unit_test/fixtures/memory_allocator_fixture.h"
#include "opencl/test/unit_test/libult/ult_command_stream_receiver.h"
#include "test.h"
#include "gtest/gtest.h"
@ -521,6 +522,46 @@ TEST_F(TagAllocatorTest, givenMultipleRootDevicesWhenPopulatingTagsThenCreateMul
}
}
HWTEST_F(TagAllocatorTest, givenMultipleRootDevicesWhenCallingMakeResidentThenUseCorrectRootDeviceIndex) {
constexpr uint32_t maxRootDeviceIndex = 1;
auto executionEnvironment = std::make_unique<NEO::ExecutionEnvironment>();
executionEnvironment->prepareRootDeviceEnvironments(maxRootDeviceIndex + 1);
for (auto i = 0u; i < executionEnvironment->rootDeviceEnvironments.size(); i++) {
executionEnvironment->rootDeviceEnvironments[i]->setHwInfo(defaultHwInfo.get());
}
auto testMemoryManager = new MockMemoryManager(false, false, *executionEnvironment);
executionEnvironment->memoryManager.reset(testMemoryManager);
const std::vector<uint32_t> indicesVector = {0, 1};
MockTagAllocator<TimestampPackets<uint32_t>> timestampPacketAllocator(indicesVector, testMemoryManager, 1, 1, sizeof(TimestampPackets<uint32_t>), false, mockDeviceBitfield);
EXPECT_EQ(1u, timestampPacketAllocator.getGraphicsAllocationsCount());
auto multiGraphicsAllocation = timestampPacketAllocator.gfxAllocations[0].get();
auto rootCsr0 = std::unique_ptr<UltCommandStreamReceiver<FamilyType>>(static_cast<UltCommandStreamReceiver<FamilyType> *>(createCommandStream(*executionEnvironment, 0, 1)));
auto osContext0 = testMemoryManager->createAndRegisterOsContext(rootCsr0.get(), {aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular}, 1, PreemptionMode::Disabled, true);
rootCsr0->setupContext(*osContext0);
auto rootCsr1 = std::unique_ptr<UltCommandStreamReceiver<FamilyType>>(static_cast<UltCommandStreamReceiver<FamilyType> *>(createCommandStream(*executionEnvironment, 1, 1)));
auto osContext1 = testMemoryManager->createAndRegisterOsContext(rootCsr1.get(), {aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular}, 1, PreemptionMode::Disabled, true);
rootCsr1->setupContext(*osContext1);
rootCsr0->storeMakeResidentAllocations = true;
rootCsr1->storeMakeResidentAllocations = true;
rootCsr0->makeResident(*multiGraphicsAllocation);
EXPECT_TRUE(rootCsr0->isMadeResident(multiGraphicsAllocation->getGraphicsAllocation(0)));
EXPECT_FALSE(rootCsr0->isMadeResident(multiGraphicsAllocation->getGraphicsAllocation(1)));
rootCsr1->makeResident(*multiGraphicsAllocation);
EXPECT_FALSE(rootCsr1->isMadeResident(multiGraphicsAllocation->getGraphicsAllocation(0)));
EXPECT_TRUE(rootCsr1->isMadeResident(multiGraphicsAllocation->getGraphicsAllocation(1)));
}
TEST_F(TagAllocatorTest, givenNotSupportedTagTypeWhenCallingMethodThenAbortOrReturnInitialValue) {
{

View File

@ -92,6 +92,10 @@ bool CommandStreamReceiver::submitBatchBuffer(BatchBuffer &batchBuffer, Residenc
return ret;
}
void CommandStreamReceiver::makeResident(MultiGraphicsAllocation &gfxAllocation) {
makeResident(*gfxAllocation.getGraphicsAllocation(rootDeviceIndex));
}
void CommandStreamReceiver::makeResident(GraphicsAllocation &gfxAllocation) {
auto submissionTaskCount = this->taskCount + 1;
if (gfxAllocation.isResidencyTaskCountBelow(submissionTaskCount, osContext->getContextId())) {

View File

@ -87,6 +87,7 @@ class CommandStreamReceiver {
virtual void programHardwareContext(LinearStream &cmdStream) = 0;
virtual size_t getCmdsSizeForHardwareContext() const = 0;
void makeResident(MultiGraphicsAllocation &gfxAllocation);
MOCKABLE_VIRTUAL void makeResident(GraphicsAllocation &gfxAllocation);
virtual void makeNonResident(GraphicsAllocation &gfxAllocation);
MOCKABLE_VIRTUAL void makeSurfacePackNonResident(ResidencyContainer &allocationsForResidency);

View File

@ -31,6 +31,10 @@ void TagAllocatorBase::cleanUpResources() {
gfxAllocations.clear();
}
MultiGraphicsAllocation *TagNodeBase::getBaseGraphicsAllocation() const {
return gfxAllocation;
}
void TagNodeBase::returnTag() {
allocator->returnTag(this);
}

View File

@ -32,7 +32,7 @@ class TagNodeBase : public NonCopyableOrMovableClass {
public:
virtual ~TagNodeBase() = default;
GraphicsAllocation *getBaseGraphicsAllocation() const { return gfxAllocation; }
MultiGraphicsAllocation *getBaseGraphicsAllocation() const;
uint64_t getGpuAddress() const { return gpuAddress; }
@ -92,7 +92,7 @@ class TagNodeBase : public NonCopyableOrMovableClass {
TagAllocatorBase *allocator = nullptr;
GraphicsAllocation *gfxAllocation = nullptr;
MultiGraphicsAllocation *gfxAllocation = nullptr;
uint64_t gpuAddress = 0;
std::atomic<uint32_t> refCount{0};
std::atomic<uint32_t> implicitCpuDependenciesCount{0};

View File

@ -106,7 +106,7 @@ void TagAllocator<TagType>::populateFreeTags() {
auto tagOffset = i * tagSize;
nodesMemory[i].allocator = this;
nodesMemory[i].gfxAllocation = multiGraphicsAllocation->getDefaultGraphicsAllocation();
nodesMemory[i].gfxAllocation = multiGraphicsAllocation;
nodesMemory[i].tagForCpuAccess = reinterpret_cast<TagType *>(ptrOffset(baseCpuAddress, tagOffset));
nodesMemory[i].gpuAddress = baseGpuAddress + tagOffset;
nodesMemory[i].setDoNotReleaseNodes(doNotReleaseNodes);