Fix multi thread usage of external host alloc

This is fixed reupload of this commit after auto revert
With this commit OpenCL will track if external host memory is used from
few threads and will secure to update task count in all threads before
destroing allocation.

Resolves: NEO-6807

Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
Maciej Plewka
2022-05-05 12:01:59 +00:00
committed by Compute-Runtime-Automation
parent 2a0c395db5
commit 0a16dc6c47
12 changed files with 183 additions and 8 deletions

View File

@ -297,3 +297,25 @@ HWTEST_F(InternalAllocationStorageTest, givenMultipleActivePartitionsWhenDetachi
memoryManager->freeGraphicsMemory(allocationReusable.release());
}
TEST_F(InternalAllocationStorageTest, givenInternalAllocationWhenTaskCountMetsExpectationAndItHasBeenAssignedThenAllocIsRemoved) {
auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize});
uint32_t expectedTaskCount = 10u;
*csr->getTagAddress() = expectedTaskCount;
allocation->updateTaskCount(expectedTaskCount, csr->getOsContext().getContextId());
allocation->hostPtrTaskCountAssignment = 0;
storage->storeAllocation(std::unique_ptr<GraphicsAllocation>(allocation), TEMPORARY_ALLOCATION);
storage->cleanAllocationList(expectedTaskCount, TEMPORARY_ALLOCATION);
EXPECT_TRUE(csr->getTemporaryAllocations().peekIsEmpty());
}
TEST_F(InternalAllocationStorageTest, givenInternalAllocationWhenTaskCountMetsExpectationAndItHasNotBeenAssignedThenAllocIsNotRemoved) {
auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr->getRootDeviceIndex(), MemoryConstants::pageSize});
uint32_t expectedTaskCount = 10u;
*csr->getTagAddress() = expectedTaskCount;
allocation->updateTaskCount(expectedTaskCount, csr->getOsContext().getContextId());
allocation->hostPtrTaskCountAssignment = 1;
storage->storeAllocation(std::unique_ptr<GraphicsAllocation>(allocation), TEMPORARY_ALLOCATION);
storage->cleanAllocationList(expectedTaskCount, TEMPORARY_ALLOCATION);
EXPECT_FALSE(csr->getTemporaryAllocations().peekIsEmpty());
allocation->hostPtrTaskCountAssignment = 0;
}

View File

@ -738,7 +738,8 @@ bool CommandStreamReceiver::createAllocationForHostSurface(HostPtrSurface &surfa
if (allocation == nullptr) {
return false;
}
allocation->updateTaskCount(CompletionStamp::notReady, osContext->getContextId());
allocation->hostPtrTaskCountAssignment++;
allocation->updateTaskCount(0u, osContext->getContextId());
surface.setAllocation(allocation.get());
internalAllocationStorage->storeAllocation(std::move(allocation), TEMPORARY_ALLOCATION);
return true;

View File

@ -1076,7 +1076,8 @@ uint32_t CommandStreamReceiverHw<GfxFamily>::flushBcsTask(const BlitPropertiesCo
}
blitProperties.csrDependencies.makeResident(*this);
blitProperties.srcAllocation->prepareHostPtrForResidency(this);
blitProperties.dstAllocation->prepareHostPtrForResidency(this);
makeResident(*blitProperties.srcAllocation);
makeResident(*blitProperties.dstAllocation);
if (blitProperties.clearColorAllocation) {

View File

@ -7,6 +7,7 @@
#include "graphics_allocation.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/gmm_helper/gmm.h"
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/aligned_memory.h"
@ -102,6 +103,17 @@ bool GraphicsAllocation::isTbxWritable(uint32_t banks) const {
return isAnyBitSet(aubInfo.tbxWritable, banks);
}
void GraphicsAllocation::prepareHostPtrForResidency(CommandStreamReceiver *csr) {
if (hostPtrTaskCountAssignment > 0) {
auto allocTaskCount = getTaskCount(csr->getOsContext().getContextId());
auto currentTaskCount = csr->peekTaskCount() + 1;
if (currentTaskCount > allocTaskCount) {
updateTaskCount(currentTaskCount, csr->getOsContext().getContextId());
hostPtrTaskCountAssignment--;
}
}
}
constexpr uint32_t GraphicsAllocation::objectNotUsed;
constexpr uint32_t GraphicsAllocation::objectNotResident;
constexpr uint32_t GraphicsAllocation::objectAlwaysResident;

View File

@ -43,6 +43,7 @@ constexpr auto nonSharedResource = 0u;
class Gmm;
class MemoryManager;
class CommandStreamReceiver;
struct AubInfo {
uint32_t aubWritable = std::numeric_limits<uint32_t>::max();
@ -155,8 +156,8 @@ class GraphicsAllocation : public IDNode<GraphicsAllocation> {
bool isUsed() const { return registeredContextsNum > 0; }
bool isUsedByManyOsContexts() const { return registeredContextsNum > 1u; }
bool isUsedByOsContext(uint32_t contextId) const { return objectNotUsed != getTaskCount(contextId); }
void updateTaskCount(uint32_t newTaskCount, uint32_t contextId);
uint32_t getTaskCount(uint32_t contextId) const { return usageInfos[contextId].taskCount; }
MOCKABLE_VIRTUAL void updateTaskCount(uint32_t newTaskCount, uint32_t contextId);
MOCKABLE_VIRTUAL uint32_t getTaskCount(uint32_t contextId) const { return usageInfos[contextId].taskCount; }
void releaseUsageInOsContext(uint32_t contextId) { updateTaskCount(objectNotUsed, contextId); }
uint32_t getInspectionId(uint32_t contextId) const { return usageInfos[contextId].inspectionId; }
void setInspectionId(uint32_t newInspectionId, uint32_t contextId) { usageInfos[contextId].inspectionId = newInspectionId; }
@ -215,6 +216,8 @@ class GraphicsAllocation : public IDNode<GraphicsAllocation> {
this->reservedAddressRangeInfo.rangeSize = size;
}
void prepareHostPtrForResidency(CommandStreamReceiver *csr);
Gmm *getDefaultGmm() const {
return getGmm(0u);
}
@ -252,6 +255,7 @@ class GraphicsAllocation : public IDNode<GraphicsAllocation> {
constexpr static uint32_t objectNotResident = std::numeric_limits<uint32_t>::max();
constexpr static uint32_t objectNotUsed = std::numeric_limits<uint32_t>::max();
constexpr static uint32_t objectAlwaysResident = std::numeric_limits<uint32_t>::max() - 1;
std::atomic<uint32_t> hostPtrTaskCountAssignment{0};
protected:
struct UsageInfo {

View File

@ -52,7 +52,7 @@ void InternalAllocationStorage::freeAllocationsList(uint32_t waitTaskCount, Allo
IDList<GraphicsAllocation, false, true> allocationsLeft;
while (curr != nullptr) {
auto *next = curr->next;
if (curr->getTaskCount(commandStreamReceiver.getOsContext().getContextId()) <= waitTaskCount) {
if (curr->hostPtrTaskCountAssignment == 0 && curr->getTaskCount(commandStreamReceiver.getOsContext().getContextId()) <= waitTaskCount) {
memoryManager->freeGraphicsMemory(curr);
} else {
allocationsLeft.pushTailOne(*curr);

View File

@ -9,6 +9,7 @@
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/helpers/cache_policy.h"
#include "shared/source/memory_manager/graphics_allocation.h"
#include "shared/source/os_interface/os_context.h"
namespace NEO {
@ -49,6 +50,7 @@ class HostPtrSurface : public Surface {
void makeResident(CommandStreamReceiver &csr) override {
DEBUG_BREAK_IF(!gfxAllocation);
gfxAllocation->prepareHostPtrForResidency(&csr);
csr.makeResidentHostPtrAllocation(gfxAllocation);
}
Surface *duplicate() override {

View File

@ -318,7 +318,10 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
bool createAllocationForHostSurface(HostPtrSurface &surface, bool requiresL3Flush) override {
createAllocationForHostSurfaceCalled++;
cpuCopyForHostPtrSurfaceAllowed = surface.peekIsPtrCopyAllowed();
return BaseClass::createAllocationForHostSurface(surface, requiresL3Flush);
auto status = BaseClass::createAllocationForHostSurface(surface, requiresL3Flush);
if (status)
surface.getAllocation()->hostPtrTaskCountAssignment--;
return status;
}
void ensureCommandBufferAllocation(LinearStream &commandStream, size_t minimumRequiredSize, size_t additionalAllocationSize) override {

View File

@ -15,6 +15,7 @@
#include "shared/source/helpers/hw_info.h"
#include "shared/source/helpers/string.h"
#include "shared/source/memory_manager/graphics_allocation.h"
#include "shared/source/memory_manager/surface.h"
#include "shared/source/os_interface/os_context.h"
#include "shared/test/common/helpers/dispatch_flags_helper.h"
@ -151,7 +152,12 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
++hostPtrSurfaceCreationMutexLockCount;
return CommandStreamReceiver::obtainHostPtrSurfaceCreationLock();
}
bool createAllocationForHostSurface(HostPtrSurface &surface, bool requiresL3Flush) override {
bool status = CommandStreamReceiver::createAllocationForHostSurface(surface, requiresL3Flush);
if (status)
surface.getAllocation()->hostPtrTaskCountAssignment--;
return status;
}
void postInitFlagsSetup() override {}
static constexpr size_t tagSize = 256;

View File

@ -51,6 +51,20 @@ class MockGraphicsAllocation : public MemoryAllocation {
}
};
class MockGraphicsAllocationTaskCount : public MockGraphicsAllocation {
public:
uint32_t getTaskCount(uint32_t contextId) const override {
getTaskCountCalleedTimes++;
return MockGraphicsAllocation::getTaskCount(contextId);
}
void updateTaskCount(uint32_t newTaskCount, uint32_t contextId) override {
updateTaskCountCalleedTimes++;
MockGraphicsAllocation::updateTaskCount(newTaskCount, contextId);
}
static uint32_t getTaskCountCalleedTimes;
uint32_t updateTaskCountCalleedTimes = 0;
};
namespace GraphicsAllocationHelper {
static inline MultiGraphicsAllocation toMultiGraphicsAllocation(GraphicsAllocation *graphicsAllocation) {

View File

@ -1502,6 +1502,50 @@ TEST_F(CreateAllocationForHostSurfaceTest, givenTemporaryAllocationWhenCreateAll
EXPECT_EQ(allocationPtr, hostSurfaceAllocationPtr);
}
class MockCommandStreamReceiverHostPtrCreate : public MockCommandStreamReceiver {
public:
MockCommandStreamReceiverHostPtrCreate(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield)
: MockCommandStreamReceiver(executionEnvironment, rootDeviceIndex, deviceBitfield) {}
bool createAllocationForHostSurface(HostPtrSurface &surface, bool requiresL3Flush) override {
return CommandStreamReceiver::createAllocationForHostSurface(surface, requiresL3Flush);
}
};
TEST_F(CreateAllocationForHostSurfaceTest, givenTemporaryAllocationWhenCreateAllocationForHostSurfaceThenHostPtrTaskCountAssignmentWillIncrease) {
auto mockCsr = std::make_unique<MockCommandStreamReceiverHostPtrCreate>(executionEnvironment, 0u, device->getDeviceBitfield());
mockCsr->internalAllocationStorage = std::make_unique<InternalAllocationStorage>(*mockCsr.get());
mockCsr->osContext = &commandStreamReceiver->getOsContext();
auto hostPtr = reinterpret_cast<void *>(0x1234);
size_t size = 100;
auto temporaryAllocation = std::make_unique<MemoryAllocation>(0,
AllocationType::EXTERNAL_HOST_PTR, hostPtr, size, 0, MemoryPool::System4KBPages, MemoryManager::maxOsContextCount);
auto allocationPtr = temporaryAllocation.get();
temporaryAllocation->updateTaskCount(0u, 0u);
mockCsr->getInternalAllocationStorage()->storeAllocation(std::move(temporaryAllocation), TEMPORARY_ALLOCATION);
*mockCsr->getTagAddress() = 1u;
HostPtrSurface hostSurface(hostPtr, size);
uint32_t valueBefore = allocationPtr->hostPtrTaskCountAssignment;
mockCsr->createAllocationForHostSurface(hostSurface, false);
EXPECT_EQ(valueBefore + 1, hostSurface.getAllocation()->hostPtrTaskCountAssignment);
allocationPtr->hostPtrTaskCountAssignment--;
}
TEST_F(CreateAllocationForHostSurfaceTest, givenTemporaryAllocationWhenCreateAllocationForHostSurfaceThenAllocTaskCountEqualZero) {
auto hostPtr = reinterpret_cast<void *>(0x1234);
size_t size = 100;
auto temporaryAllocation = std::make_unique<MemoryAllocation>(0,
AllocationType::EXTERNAL_HOST_PTR, hostPtr, size, 0, MemoryPool::System4KBPages, MemoryManager::maxOsContextCount);
auto allocationPtr = temporaryAllocation.get();
temporaryAllocation->updateTaskCount(10u, 0u);
commandStreamReceiver->getInternalAllocationStorage()->storeAllocation(std::move(temporaryAllocation), TEMPORARY_ALLOCATION);
*commandStreamReceiver->getTagAddress() = 1u;
HostPtrSurface hostSurface(hostPtr, size);
EXPECT_EQ(allocationPtr->getTaskCount(0u), 10u);
commandStreamReceiver->createAllocationForHostSurface(hostSurface, false);
EXPECT_EQ(allocationPtr->getTaskCount(0u), 0u);
}
TEST_F(CreateAllocationForHostSurfaceTest, whenCreatingAllocationFromHostPtrSurfaceThenLockMutex) {
const char memory[8] = {1, 2, 3, 4, 5, 6, 7, 8};
size_t size = sizeof(memory);

View File

@ -6,6 +6,7 @@
*/
#include "shared/test/common/mocks/mock_aub_csr.h"
#include "shared/test/common/mocks/mock_command_stream_receiver.h"
#include "shared/test/common/mocks/mock_execution_environment.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h"
#include "shared/test/common/test_macros/test.h"
@ -294,7 +295,7 @@ HWTEST_F(GraphicsAllocationTests, givenGraphicsAllocationThatHasPageTablesClonin
EXPECT_TRUE(aubCsr.isAubWritable(graphicsAllocation));
//modify non default bank
// modify non default bank
graphicsAllocation.setAubWritable(false, 0x2);
EXPECT_TRUE(aubCsr.isAubWritable(graphicsAllocation));
@ -399,3 +400,68 @@ HWTEST_F(GraphicsAllocationTests, givenMultiStorageGraphicsAllocationWhenTbxWrit
EXPECT_TRUE(graphicsAllocation.isTbxWritable(0b101));
EXPECT_TRUE(graphicsAllocation.isTbxWritable(0b1010));
}
uint32_t MockGraphicsAllocationTaskCount::getTaskCountCalleedTimes = 0;
TEST(GraphicsAllocationTest, givenGraphicsAllocationWhenAssignedTaskCountEqualZeroThenPrepareForResidencyDoeNotCallGetTaskCount) {
MockGraphicsAllocationTaskCount::getTaskCountCalleedTimes = 0;
MockGraphicsAllocationTaskCount graphicsAllocation;
graphicsAllocation.hostPtrTaskCountAssignment = 0;
graphicsAllocation.prepareHostPtrForResidency(nullptr);
EXPECT_EQ(MockGraphicsAllocationTaskCount::getTaskCountCalleedTimes, 0u);
MockGraphicsAllocationTaskCount::getTaskCountCalleedTimes = 0;
}
HWTEST_F(GraphicsAllocationTests, givenGraphicsAllocationWhenAssignedTaskCountAbovelZeroThenPrepareForResidencyGetTaskCountWasCalled) {
executionEnvironment.initializeMemoryManager();
auto osContext = std::unique_ptr<OsContext>(OsContext::create(nullptr, 0, EngineDescriptorHelper::getDefaultDescriptor()));
MockCommandStreamReceiver csr(executionEnvironment, 0, 1);
csr.osContext = osContext.get();
MockGraphicsAllocationTaskCount::getTaskCountCalleedTimes = 0;
MockGraphicsAllocationTaskCount graphicsAllocation;
graphicsAllocation.hostPtrTaskCountAssignment = 1;
graphicsAllocation.prepareHostPtrForResidency(&csr);
EXPECT_EQ(MockGraphicsAllocationTaskCount::getTaskCountCalleedTimes, 1u);
MockGraphicsAllocationTaskCount::getTaskCountCalleedTimes = 0;
}
HWTEST_F(GraphicsAllocationTests, givenGraphicsAllocationAllocTaskCountHigherThanInCsrThenUpdateTaskCountWasNotCalled) {
executionEnvironment.initializeMemoryManager();
auto osContext = std::unique_ptr<OsContext>(OsContext::create(nullptr, 0, EngineDescriptorHelper::getDefaultDescriptor()));
MockCommandStreamReceiver csr(executionEnvironment, 0, 1);
csr.osContext = osContext.get();
MockGraphicsAllocationTaskCount graphicsAllocation;
graphicsAllocation.updateTaskCount(10u, 0u);
*csr.getTagAddress() = 5;
graphicsAllocation.hostPtrTaskCountAssignment = 1;
auto calledTimesBefore = graphicsAllocation.updateTaskCountCalleedTimes;
graphicsAllocation.prepareHostPtrForResidency(&csr);
EXPECT_EQ(graphicsAllocation.updateTaskCountCalleedTimes, calledTimesBefore);
}
HWTEST_F(GraphicsAllocationTests, givenGraphicsAllocationAllocTaskCountLowerThanInCsrThenUpdateTaskCountWasCalled) {
executionEnvironment.initializeMemoryManager();
auto osContext = std::unique_ptr<OsContext>(OsContext::create(nullptr, 0, EngineDescriptorHelper::getDefaultDescriptor()));
MockCommandStreamReceiver csr(executionEnvironment, 0, 1);
csr.osContext = osContext.get();
MockGraphicsAllocationTaskCount graphicsAllocation;
graphicsAllocation.updateTaskCount(5u, 0u);
csr.taskCount = 10;
graphicsAllocation.hostPtrTaskCountAssignment = 1;
auto calledTimesBefore = graphicsAllocation.updateTaskCountCalleedTimes;
graphicsAllocation.prepareHostPtrForResidency(&csr);
EXPECT_EQ(graphicsAllocation.updateTaskCountCalleedTimes, calledTimesBefore + 1u);
}
HWTEST_F(GraphicsAllocationTests, givenGraphicsAllocationAllocTaskCountLowerThanInCsrThenAssignmentCountIsDecremented) {
executionEnvironment.initializeMemoryManager();
auto osContext = std::unique_ptr<OsContext>(OsContext::create(nullptr, 0, EngineDescriptorHelper::getDefaultDescriptor()));
MockCommandStreamReceiver csr(executionEnvironment, 0, 1);
csr.osContext = osContext.get();
MockGraphicsAllocationTaskCount graphicsAllocation;
graphicsAllocation.updateTaskCount(5u, 0u);
csr.taskCount = 10;
graphicsAllocation.hostPtrTaskCountAssignment = 1;
graphicsAllocation.prepareHostPtrForResidency(&csr);
EXPECT_EQ(graphicsAllocation.hostPtrTaskCountAssignment, 0u);
}