Fallback to cpu copy when filling work partition allocation

move some command stream receiver tests to shared

Related-To: NEO-6325
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski 2021-10-21 11:16:19 +00:00 committed by Compute-Runtime-Automation
parent 4d6a2d0a57
commit 95610188af
17 changed files with 377 additions and 150 deletions

View File

@ -11,6 +11,7 @@ set(IGDRCL_SRCS_tests_command_stream
${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_receiver_3_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/aub_file_stream_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/aub_subcapture_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cl_command_stream_receiver_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cmd_parse_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_fixture.h
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_1_tests.cpp
@ -18,7 +19,6 @@ set(IGDRCL_SRCS_tests_command_stream
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_fixture.h
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_tests.inl
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_mt_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_flush_task_1_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_flush_task_2_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_flush_task_3_tests.cpp

View File

@ -0,0 +1,131 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/memory_manager/surface.h"
#include "shared/test/common/fixtures/device_fixture.h"
#include "shared/test/common/mocks/mock_csr.h"
#include "opencl/source/mem_obj/buffer.h"
#include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h"
#include "opencl/test/unit_test/helpers/raii_hw_helper.h"
#include "opencl/test/unit_test/mocks/mock_context.h"
#include "opencl/test/unit_test/mocks/mock_hw_helper.h"
#include "test.h"
#include "gmock/gmock.h"
using namespace NEO;
TEST(ClCommandStreamReceiverTest, WhenMakingResidentThenBufferResidencyFlagIsSet) {
MockContext context;
auto commandStreamReceiver = context.getDevice(0)->getDefaultEngine().commandStreamReceiver;
float srcMemory[] = {1.0f};
auto retVal = CL_INVALID_VALUE;
auto buffer = Buffer::create(
&context,
CL_MEM_USE_HOST_PTR,
sizeof(srcMemory),
srcMemory,
retVal);
ASSERT_NE(nullptr, buffer);
auto graphicsAllocation = buffer->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex());
EXPECT_FALSE(graphicsAllocation->isResident(commandStreamReceiver->getOsContext().getContextId()));
commandStreamReceiver->makeResident(*graphicsAllocation);
EXPECT_TRUE(graphicsAllocation->isResident(commandStreamReceiver->getOsContext().getContextId()));
delete buffer;
}
using ClCommandStreamReceiverTests = Test<DeviceFixture>;
HWTEST_F(ClCommandStreamReceiverTests, givenCommandStreamReceiverWhenFenceAllocationIsRequiredAndCreateGlobalFenceAllocationIsCalledThenFenceAllocationIsAllocated) {
RAIIHwHelperFactory<MockHwHelperWithFenceAllocation<FamilyType>> hwHelperBackup{pDevice->getHardwareInfo().platform.eRenderCoreFamily};
MockCsrHw<FamilyType> csr(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
csr.setupContext(*pDevice->getDefaultEngine().osContext);
EXPECT_EQ(nullptr, csr.globalFenceAllocation);
EXPECT_TRUE(csr.createGlobalFenceAllocation());
ASSERT_NE(nullptr, csr.globalFenceAllocation);
EXPECT_EQ(GraphicsAllocation::AllocationType::GLOBAL_FENCE, csr.globalFenceAllocation->getAllocationType());
}
HWTEST_F(ClCommandStreamReceiverTests, givenCommandStreamReceiverWhenGettingFenceAllocationThenCorrectFenceAllocationIsReturned) {
RAIIHwHelperFactory<MockHwHelperWithFenceAllocation<FamilyType>> hwHelperBackup{pDevice->getHardwareInfo().platform.eRenderCoreFamily};
CommandStreamReceiverHw<FamilyType> csr(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
csr.setupContext(*pDevice->getDefaultEngine().osContext);
EXPECT_EQ(nullptr, csr.getGlobalFenceAllocation());
EXPECT_TRUE(csr.createGlobalFenceAllocation());
ASSERT_NE(nullptr, csr.getGlobalFenceAllocation());
EXPECT_EQ(GraphicsAllocation::AllocationType::GLOBAL_FENCE, csr.getGlobalFenceAllocation()->getAllocationType());
}
using CommandStreamReceiverMultiRootDeviceTest = MultiRootDeviceFixture;
TEST_F(CommandStreamReceiverMultiRootDeviceTest, WhenCreatingCommandStreamGraphicsAllocationsThenTheyHaveCorrectRootDeviceIndex) {
auto commandStreamReceiver = &device1->getGpgpuCommandStreamReceiver();
ASSERT_NE(nullptr, commandStreamReceiver);
EXPECT_EQ(expectedRootDeviceIndex, commandStreamReceiver->getRootDeviceIndex());
// Linear stream / Command buffer
GraphicsAllocation *allocation = mockMemoryManager->allocateGraphicsMemoryWithProperties({expectedRootDeviceIndex, 128u, GraphicsAllocation::AllocationType::COMMAND_BUFFER, device1->getDeviceBitfield()});
LinearStream commandStream{allocation};
commandStreamReceiver->ensureCommandBufferAllocation(commandStream, 100u, 0u);
EXPECT_EQ(allocation, commandStream.getGraphicsAllocation());
EXPECT_EQ(128u, commandStream.getMaxAvailableSpace());
EXPECT_EQ(expectedRootDeviceIndex, commandStream.getGraphicsAllocation()->getRootDeviceIndex());
commandStreamReceiver->ensureCommandBufferAllocation(commandStream, 1024u, 0u);
EXPECT_NE(allocation, commandStream.getGraphicsAllocation());
EXPECT_EQ(0u, commandStream.getMaxAvailableSpace() % MemoryConstants::pageSize64k);
EXPECT_EQ(expectedRootDeviceIndex, commandStream.getGraphicsAllocation()->getRootDeviceIndex());
mockMemoryManager->freeGraphicsMemory(commandStream.getGraphicsAllocation());
// Debug surface
auto debugSurface = commandStreamReceiver->allocateDebugSurface(MemoryConstants::pageSize);
ASSERT_NE(nullptr, debugSurface);
EXPECT_EQ(expectedRootDeviceIndex, debugSurface->getRootDeviceIndex());
// Indirect heaps
IndirectHeap::Type heapTypes[]{IndirectHeap::DYNAMIC_STATE, IndirectHeap::INDIRECT_OBJECT, IndirectHeap::SURFACE_STATE};
for (auto heapType : heapTypes) {
IndirectHeap *heap = nullptr;
commandStreamReceiver->allocateHeapMemory(heapType, MemoryConstants::pageSize, heap);
ASSERT_NE(nullptr, heap);
ASSERT_NE(nullptr, heap->getGraphicsAllocation());
EXPECT_EQ(expectedRootDeviceIndex, heap->getGraphicsAllocation()->getRootDeviceIndex());
mockMemoryManager->freeGraphicsMemory(heap->getGraphicsAllocation());
delete heap;
}
// Tag allocation
ASSERT_NE(nullptr, commandStreamReceiver->getTagAllocation());
EXPECT_EQ(expectedRootDeviceIndex, commandStreamReceiver->getTagAllocation()->getRootDeviceIndex());
// Preemption allocation
if (nullptr == commandStreamReceiver->getPreemptionAllocation()) {
commandStreamReceiver->createPreemptionAllocation();
}
EXPECT_EQ(expectedRootDeviceIndex, commandStreamReceiver->getPreemptionAllocation()->getRootDeviceIndex());
// HostPtr surface
char memory[8] = {1, 2, 3, 4, 5, 6, 7, 8};
HostPtrSurface surface(memory, sizeof(memory), true);
EXPECT_TRUE(commandStreamReceiver->createAllocationForHostSurface(surface, false));
ASSERT_NE(nullptr, surface.getAllocation());
EXPECT_EQ(expectedRootDeviceIndex, surface.getAllocation()->getRootDeviceIndex());
}

View File

@ -763,6 +763,7 @@ TEST_F(DrmMemoryManagerCopyMemoryToAllocationTest, givenDrmMemoryManagerWhenCopy
allocData.flags.allocateMemory = true;
allocData.type = GraphicsAllocation::AllocationType::KERNEL_ISA;
allocData.rootDeviceIndex = rootDeviceIndex;
allocData.storageInfo.memoryBanks.set(0, true);
MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success;
auto allocation = drmMemoryManger.allocateGraphicsMemoryInDevicePool(allocData, status);
ASSERT_NE(nullptr, allocation);
@ -785,6 +786,7 @@ TEST_F(DrmMemoryManagerCopyMemoryToAllocationTest, givenDrmMemoryManagerWhenCopy
allocData.flags.allocateMemory = true;
allocData.type = GraphicsAllocation::AllocationType::KERNEL_ISA;
allocData.rootDeviceIndex = rootDeviceIndex;
allocData.storageInfo.memoryBanks.set(0, true);
MemoryManager::AllocationStatus status = MemoryManager::AllocationStatus::Success;
auto allocation = drmMemoryManger.allocateGraphicsMemoryInDevicePool(allocData, status);
ASSERT_NE(nullptr, allocation);

View File

@ -5418,4 +5418,61 @@ TEST_F(DrmMemoryManagerWithLocalMemoryAndExplicitExpectationsTest, givenOversize
memoryManager->freeGraphicsMemory(allocation);
}
struct DrmMemoryManagerToTestCopyMemoryToAllocationBanks : public DrmMemoryManager {
DrmMemoryManagerToTestCopyMemoryToAllocationBanks(ExecutionEnvironment &executionEnvironment, size_t lockableLocalMemorySize)
: DrmMemoryManager(gemCloseWorkerMode::gemCloseWorkerInactive, false, false, executionEnvironment) {
lockedLocalMemorySize = lockableLocalMemorySize;
}
void *lockResourceInLocalMemoryImpl(BufferObject *bo) override {
if (lockedLocalMemorySize > 0) {
if (static_cast<uint32_t>(bo->peekHandle()) < lockedLocalMemory.size()) {
lockedLocalMemory[bo->peekHandle()].reset(new uint8_t[lockedLocalMemorySize]);
return lockedLocalMemory[bo->peekHandle()].get();
}
}
return nullptr;
}
void unlockResourceInLocalMemoryImpl(BufferObject *bo) override {
}
std::array<std::unique_ptr<uint8_t[]>, 4> lockedLocalMemory;
size_t lockedLocalMemorySize = 0;
};
TEST(DrmMemoryManagerCopyMemoryToAllocationBanksTest, givenDrmMemoryManagerWhenCopyMemoryToAllocationOnSpecificMemoryBanksThenAllocationIsFilledWithCorrectDataOnSpecificBanks) {
uint8_t sourceData[64]{};
size_t offset = 3;
size_t sourceAllocationSize = sizeof(sourceData);
size_t destinationAllocationSize = sourceAllocationSize + offset;
MockExecutionEnvironment executionEnvironment;
auto drm = new DrmMock(mockFd, *executionEnvironment.rootDeviceEnvironments[0]);
executionEnvironment.rootDeviceEnvironments[0]->osInterface.reset(new OSInterface());
executionEnvironment.rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr<DriverModel>(drm));
DrmMemoryManagerToTestCopyMemoryToAllocationBanks drmMemoryManger(executionEnvironment, destinationAllocationSize);
std::vector<uint8_t> dataToCopy(sourceAllocationSize, 1u);
MockDrmAllocation mockAllocation(GraphicsAllocation::AllocationType::WORK_PARTITION_SURFACE, MemoryPool::LocalMemory);
mockAllocation.storageInfo.memoryBanks = 0b1111;
DeviceBitfield memoryBanksToCopy = 0b1010;
mockAllocation.bufferObjects.clear();
for (auto index = 0u; index < 4; index++) {
drmMemoryManger.lockedLocalMemory[index].reset();
mockAllocation.bufferObjects.push_back(new BufferObject(drm, index, sourceAllocationSize, 3));
}
auto ret = drmMemoryManger.copyMemoryToAllocationBanks(&mockAllocation, offset, dataToCopy.data(), dataToCopy.size(), memoryBanksToCopy);
EXPECT_TRUE(ret);
EXPECT_EQ(nullptr, drmMemoryManger.lockedLocalMemory[0].get());
ASSERT_NE(nullptr, drmMemoryManger.lockedLocalMemory[1].get());
EXPECT_EQ(nullptr, drmMemoryManger.lockedLocalMemory[2].get());
ASSERT_NE(nullptr, drmMemoryManger.lockedLocalMemory[3].get());
EXPECT_EQ(0, memcmp(ptrOffset(drmMemoryManger.lockedLocalMemory[1].get(), offset), dataToCopy.data(), dataToCopy.size()));
EXPECT_EQ(0, memcmp(ptrOffset(drmMemoryManger.lockedLocalMemory[3].get(), offset), dataToCopy.data(), dataToCopy.size()));
for (auto index = 0u; index < 4; index++) {
delete mockAllocation.bufferObjects[index];
}
}
} // namespace NEO

View File

@ -2497,3 +2497,52 @@ TEST_F(WddmMemoryManagerSimpleTest, whenAlignmentRequirementExceedsPageSizeThenA
EXPECT_EQ(0U, memoryManager.callCount.allocateGraphicsMemoryUsingKmdAndMapItToCpuVA);
}
}
struct WddmWithMockedLock : public WddmMock {
using WddmMock::WddmMock;
void *lockResource(const D3DKMT_HANDLE &handle, bool applyMakeResidentPriorToLock, size_t size) override {
if (handle < storageLocked.size()) {
storageLocked.set(handle);
}
return storages[handle];
}
std::bitset<4> storageLocked{};
uint8_t storages[EngineLimits::maxHandleCount][MemoryConstants::pageSize64k] = {0u};
};
TEST(WddmMemoryManagerCopyMemoryToAllocationBanksTest, givenAllocationWithMultiTilePlacementWhenCopyDataSpecificMemoryBanksThenLockOnlySpecificStorages) {
uint8_t sourceData[32]{};
size_t offset = 3;
size_t sourceAllocationSize = sizeof(sourceData);
auto hwInfo = *defaultHwInfo;
hwInfo.featureTable.ftrLocalMemory = true;
MockExecutionEnvironment executionEnvironment(&hwInfo);
executionEnvironment.initGmm();
auto wddm = new WddmWithMockedLock(*executionEnvironment.rootDeviceEnvironments[0]);
wddm->init();
MemoryManagerCreate<WddmMemoryManager> memoryManager(true, true, executionEnvironment);
MockWddmAllocation mockAllocation(executionEnvironment.rootDeviceEnvironments[0]->getGmmClientContext());
mockAllocation.storageInfo.memoryBanks = 0b1111;
DeviceBitfield memoryBanksToCopy = 0b1010;
mockAllocation.handles.resize(4);
for (auto index = 0u; index < 4; index++) {
wddm->storageLocked.set(index, false);
if (mockAllocation.storageInfo.memoryBanks.test(index)) {
mockAllocation.handles[index] = index;
}
}
std::vector<uint8_t> dataToCopy(sourceAllocationSize, 1u);
auto ret = memoryManager.copyMemoryToAllocationBanks(&mockAllocation, offset, dataToCopy.data(), dataToCopy.size(), memoryBanksToCopy);
EXPECT_TRUE(ret);
EXPECT_FALSE(wddm->storageLocked.test(0));
ASSERT_TRUE(wddm->storageLocked.test(1));
EXPECT_FALSE(wddm->storageLocked.test(2));
ASSERT_TRUE(wddm->storageLocked.test(3));
EXPECT_EQ(0, memcmp(ptrOffset(wddm->storages[1], offset), dataToCopy.data(), dataToCopy.size()));
EXPECT_EQ(0, memcmp(ptrOffset(wddm->storages[3], offset), dataToCopy.data(), dataToCopy.size()));
}

View File

@ -578,12 +578,11 @@ bool CommandStreamReceiver::createWorkPartitionAllocation(const Device &device)
}
const uint32_t copySrc = deviceIndex;
const Vec3<size_t> copySrcSize = {sizeof(copySrc), 1, 1};
DeviceBitfield copyBitfield{};
copyBitfield.set(deviceIndex);
BlitOperationResult blitResult = BlitHelper::blitMemoryToAllocationBanks(device, workPartitionAllocation, 0, &copySrc, copySrcSize, copyBitfield);
auto copySuccess = MemoryTransferHelper::transferMemoryToAllocationBanks(device, workPartitionAllocation, 0, &copySrc, sizeof(copySrc), copyBitfield);
if (blitResult != BlitOperationResult::Success) {
if (!copySuccess) {
return false;
}
}

View File

@ -691,6 +691,11 @@ bool MemoryManager::copyMemoryToAllocation(GraphicsAllocation *graphicsAllocatio
return true;
}
bool MemoryManager::copyMemoryToAllocationBanks(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy, DeviceBitfield handleMask) {
memcpy_s(ptrOffset(static_cast<uint8_t *>(graphicsAllocation->getUnderlyingBuffer()), destinationOffset),
(graphicsAllocation->getUnderlyingBufferSize() - destinationOffset), memoryToCopy, sizeToCopy);
return true;
}
void MemoryManager::waitForEnginesCompletion(GraphicsAllocation &graphicsAllocation) {
for (auto &engine : getRegisteredEngines()) {
auto osContextId = engine.osContext->getContextId();
@ -867,4 +872,13 @@ bool MemoryTransferHelper::transferMemoryToAllocation(bool useBlitter, const Dev
}
return device.getMemoryManager()->copyMemoryToAllocation(dstAllocation, dstOffset, srcMemory, srcSize);
}
bool MemoryTransferHelper::transferMemoryToAllocationBanks(const Device &device, GraphicsAllocation *dstAllocation, size_t dstOffset, const void *srcMemory,
size_t srcSize, DeviceBitfield dstMemoryBanks) {
auto blitSuccess = BlitHelper::blitMemoryToAllocationBanks(device, dstAllocation, dstOffset, srcMemory, {srcSize, 1, 1}, dstMemoryBanks) == BlitOperationResult::Success;
if (!blitSuccess) {
return device.getMemoryManager()->copyMemoryToAllocationBanks(dstAllocation, dstOffset, srcMemory, srcSize, dstMemoryBanks);
}
return true;
}
} // namespace NEO

View File

@ -56,7 +56,9 @@ constexpr size_t paddingBufferSize = 2 * MemoryConstants::megaByte;
namespace MemoryTransferHelper {
bool transferMemoryToAllocation(bool useBlitter, const Device &device, GraphicsAllocation *dstAllocation, size_t dstOffset, const void *srcMemory, size_t srcSize);
}
bool transferMemoryToAllocationBanks(const Device &device, GraphicsAllocation *dstAllocation, size_t dstOffset, const void *srcMemory,
size_t srcSize, DeviceBitfield dstMemoryBanks);
} // namespace MemoryTransferHelper
class MemoryManager {
public:
@ -191,6 +193,7 @@ class MemoryManager {
HostPtrManager *getHostPtrManager() const { return hostPtrManager.get(); }
void setDefaultEngineIndex(uint32_t rootDeviceIndex, uint32_t engineIndex) { defaultEngineIndex[rootDeviceIndex] = engineIndex; }
virtual bool copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy);
virtual bool copyMemoryToAllocationBanks(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy, DeviceBitfield handleMask);
HeapIndex selectHeap(const GraphicsAllocation *allocation, bool hasPointer, bool isFullRangeSVM, bool useFrontWindow);
static std::unique_ptr<MemoryManager> createMemoryManager(ExecutionEnvironment &executionEnvironment, DriverModelType driverModel = DriverModelType::UNKNOWN);
virtual void *reserveCpuAddressRange(size_t size, uint32_t rootDeviceIndex) { return nullptr; };

View File

@ -1111,8 +1111,17 @@ bool DrmMemoryManager::copyMemoryToAllocation(GraphicsAllocation *graphicsAlloca
if (graphicsAllocation->getUnderlyingBuffer() || !isLocalMemorySupported(graphicsAllocation->getRootDeviceIndex())) {
return MemoryManager::copyMemoryToAllocation(graphicsAllocation, destinationOffset, memoryToCopy, sizeToCopy);
}
return copyMemoryToAllocationBanks(graphicsAllocation, destinationOffset, memoryToCopy, sizeToCopy, maxNBitValue(graphicsAllocation->storageInfo.getNumBanks()));
}
bool DrmMemoryManager::copyMemoryToAllocationBanks(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy, DeviceBitfield handleMask) {
if (MemoryPool::isSystemMemoryPool(graphicsAllocation->getMemoryPool())) {
return false;
}
auto drmAllocation = static_cast<DrmAllocation *>(graphicsAllocation);
for (auto handleId = 0u; handleId < graphicsAllocation->storageInfo.getNumBanks(); handleId++) {
if (!handleMask.test(handleId)) {
continue;
}
auto ptr = lockResourceInLocalMemoryImpl(drmAllocation->getBOs()[handleId]);
if (!ptr) {
return false;

View File

@ -57,6 +57,7 @@ class DrmMemoryManager : public MemoryManager {
DrmGemCloseWorker *peekGemCloseWorker() const { return this->gemCloseWorker.get(); }
bool copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy) override;
bool copyMemoryToAllocationBanks(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy, DeviceBitfield handleMask) override;
MOCKABLE_VIRTUAL int obtainFdFromHandle(int boHandle, uint32_t rootDeviceindex);
AddressRange reserveGpuAddress(size_t size, uint32_t rootDeviceIndex) override;

View File

@ -18,6 +18,7 @@
#include "shared/source/helpers/heap_assigner.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/helpers/ptr_math.h"
#include "shared/source/helpers/string.h"
#include "shared/source/helpers/surface_format_info.h"
#include "shared/source/memory_manager/deferrable_deletion.h"
#include "shared/source/memory_manager/deferred_deleter.h"
@ -857,4 +858,31 @@ bool WddmMemoryManager::isCpuCopyRequired(const void *ptr) {
return inputPointerReadDelta > slownessFactor * fastestLocalRead;
}
bool WddmMemoryManager::copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy) {
if (graphicsAllocation->getUnderlyingBuffer()) {
return MemoryManager::copyMemoryToAllocation(graphicsAllocation, destinationOffset, memoryToCopy, sizeToCopy);
}
return copyMemoryToAllocationBanks(graphicsAllocation, destinationOffset, memoryToCopy, sizeToCopy, maxNBitValue(graphicsAllocation->storageInfo.getNumBanks()));
}
bool WddmMemoryManager::copyMemoryToAllocationBanks(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy, DeviceBitfield handleMask) {
if (MemoryPool::isSystemMemoryPool(graphicsAllocation->getMemoryPool())) {
return false;
}
auto &wddm = getWddm(graphicsAllocation->getRootDeviceIndex());
auto wddmAllocation = static_cast<WddmAllocation *>(graphicsAllocation);
for (auto handleId = 0u; handleId < graphicsAllocation->storageInfo.getNumBanks(); handleId++) {
if (!handleMask.test(handleId)) {
continue;
}
auto ptr = wddm.lockResource(wddmAllocation->getHandles()[handleId], wddmAllocation->needsMakeResidentBeforeLock, wddmAllocation->getAlignedSize());
if (!ptr) {
return false;
}
memcpy_s(ptrOffset(ptr, destinationOffset), graphicsAllocation->getUnderlyingBufferSize() - destinationOffset, memoryToCopy, sizeToCopy);
wddm.unlockResource(wddmAllocation->getHandles()[handleId]);
}
return true;
}
} // namespace NEO

View File

@ -65,6 +65,7 @@ class WddmMemoryManager : public MemoryManager {
AlignedMallocRestrictions *getAlignedMallocRestrictions() override;
bool copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy) override;
bool copyMemoryToAllocationBanks(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy, DeviceBitfield handleMask) override;
void *reserveCpuAddressRange(size_t size, uint32_t rootDeviceIndex) override;
void releaseReservedCpuAddressRange(void *reserved, size_t size, uint32_t rootDeviceIndex) override;
bool isCpuCopyRequired(const void *ptr) override;

View File

@ -13,9 +13,6 @@ GraphicsAllocation *WddmMemoryManager::allocateGraphicsMemoryInDevicePool(const
status = AllocationStatus::RetryInNonDevicePool;
return nullptr;
}
bool WddmMemoryManager::copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy) {
return MemoryManager::copyMemoryToAllocation(graphicsAllocation, destinationOffset, memoryToCopy, sizeToCopy);
}
bool WddmMemoryManager::mapGpuVirtualAddress(WddmAllocation *allocation, const void *requiredPtr) {
if (allocation->getNumGmms() > 1) {
return mapMultiHandleAllocationWithRetry(allocation, requiredPtr);

View File

@ -157,6 +157,12 @@ GraphicsAllocation *MockMemoryManager::createGraphicsAllocationFromExistingStora
return allocation;
}
bool MockMemoryManager::copyMemoryToAllocationBanks(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy, DeviceBitfield handleMask) {
copyMemoryToAllocationBanksCalled++;
copyMemoryToAllocationBanksParamsPassed.push_back({graphicsAllocation, destinationOffset, memoryToCopy, sizeToCopy, handleMask});
return OsAgnosticMemoryManager::copyMemoryToAllocationBanks(graphicsAllocation, destinationOffset, memoryToCopy, sizeToCopy, handleMask);
};
FailMemoryManager::FailMemoryManager(int32_t failedAllocationsCount, ExecutionEnvironment &executionEnvironment) : MockMemoryManager(executionEnvironment) {
this->failedAllocationsCount = failedAllocationsCount;
}

View File

@ -149,6 +149,18 @@ class MockMemoryManager : public MemoryManagerCreate<OsAgnosticMemoryManager> {
return MemoryManager::setMemAdvise(gfxAllocation, flags, rootDeviceIndex);
}
struct CopyMemoryToAllocationBanksParams {
GraphicsAllocation *graphicsAllocation = nullptr;
size_t destinationOffset = 0u;
const void *memoryToCopy = nullptr;
size_t sizeToCopy = 0u;
DeviceBitfield handleMask = {};
};
StackVec<CopyMemoryToAllocationBanksParams, 2> copyMemoryToAllocationBanksParamsPassed{};
bool copyMemoryToAllocationBanks(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy, DeviceBitfield handleMask) override;
uint32_t copyMemoryToAllocationBanksCalled = 0u;
uint32_t freeGraphicsMemoryCalled = 0u;
uint32_t unlockResourceCalled = 0u;
uint32_t lockResourceCalled = 0u;

View File

@ -6,6 +6,7 @@
target_sources(${TARGET_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}stream_properties_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/stream_properties_tests_common.cpp
${CMAKE_CURRENT_SOURCE_DIR}/stream_properties_tests_common.h

View File

@ -1,60 +1,41 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/command_container/implicit_scaling.h"
#include "shared/source/command_stream/command_stream_receiver_simulated_hw.h"
#include "shared/source/command_stream/linear_stream.h"
#include "shared/source/command_stream/preemption.h"
#include "shared/source/command_stream/scratch_space_controller.h"
#include "shared/source/gmm_helper/page_table_mngr.h"
#include "shared/source/helpers/cache_policy.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/helpers/timestamp_packet.h"
#include "shared/source/memory_manager/graphics_allocation.h"
#include "shared/source/memory_manager/internal_allocation_storage.h"
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/memory_manager/surface.h"
#include "shared/source/os_interface/device_factory.h"
#include "shared/source/os_interface/hw_info_config.h"
#include "shared/source/utilities/tag_allocator.h"
#include "shared/test/common/fixtures/device_fixture.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/engine_descriptor_helper.h"
#include "shared/test/common/helpers/unit_test_helper.h"
#include "shared/test/common/mocks/mock_allocation_properties.h"
#include "shared/test/common/mocks/mock_builtins.h"
#include "shared/test/common/mocks/mock_csr.h"
#include "shared/test/common/mocks/mock_execution_environment.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h"
#include "shared/test/common/mocks/mock_memory_manager.h"
#include "shared/test/common/mocks/ult_device_factory.h"
#include "shared/test/common/test_macros/matchers.h"
#include "shared/test/common/test_macros/test_checks_shared.h"
#include "shared/test/unit_test/direct_submission/direct_submission_controller_mock.h"
#include "opencl/source/mem_obj/buffer.h"
#include "opencl/source/platform/platform.h"
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
#include "opencl/test/unit_test/fixtures/multi_root_device_fixture.h"
#include "opencl/test/unit_test/helpers/raii_hw_helper.h"
#include "opencl/test/unit_test/mocks/mock_buffer.h"
#include "opencl/test/unit_test/mocks/mock_context.h"
#include "opencl/test/unit_test/mocks/mock_hw_helper.h"
#include "opencl/test/unit_test/mocks/mock_platform.h"
#include "opencl/test/unit_test/mocks/mock_program.h"
#include "test.h"
#include "gmock/gmock.h"
using namespace NEO;
struct CommandStreamReceiverTest : public ClDeviceFixture,
struct CommandStreamReceiverTest : public DeviceFixture,
public ::testing::Test {
void SetUp() override {
ClDeviceFixture::SetUp();
DeviceFixture::SetUp();
commandStreamReceiver = &pDevice->getGpgpuCommandStreamReceiver();
ASSERT_NE(nullptr, commandStreamReceiver);
@ -63,7 +44,7 @@ struct CommandStreamReceiverTest : public ClDeviceFixture,
}
void TearDown() override {
ClDeviceFixture::TearDown();
DeviceFixture::TearDown();
}
CommandStreamReceiver *commandStreamReceiver;
@ -106,29 +87,6 @@ HWTEST_F(CommandStreamReceiverTest, WhenCreatingCsrThenFlagsAreSetCorrectly) {
EXPECT_FALSE(csr.lastSentUseGlobalAtomics);
}
TEST_F(CommandStreamReceiverTest, WhenMakingResidentThenBufferResidencyFlagIsSet) {
MockContext context;
float srcMemory[] = {1.0f};
auto retVal = CL_INVALID_VALUE;
auto buffer = Buffer::create(
&context,
CL_MEM_USE_HOST_PTR,
sizeof(srcMemory),
srcMemory,
retVal);
ASSERT_NE(nullptr, buffer);
auto graphicsAllocation = buffer->getGraphicsAllocation(context.getDevice(0)->getRootDeviceIndex());
EXPECT_FALSE(graphicsAllocation->isResident(commandStreamReceiver->getOsContext().getContextId()));
commandStreamReceiver->makeResident(*graphicsAllocation);
EXPECT_TRUE(graphicsAllocation->isResident(commandStreamReceiver->getOsContext().getContextId()));
delete buffer;
}
TEST_F(CommandStreamReceiverTest, givenBaseDownloadAllocationCalledThenDoesNotChangeAnything) {
auto *memoryManager = commandStreamReceiver->getMemoryManager();
@ -881,32 +839,6 @@ TEST(CommandStreamReceiverSimpleTest, givenCommandStreamReceiverWhenInitializeTa
}
}
HWTEST_F(CommandStreamReceiverTest, givenCommandStreamReceiverWhenFenceAllocationIsRequiredAndCreateGlobalFenceAllocationIsCalledThenFenceAllocationIsAllocated) {
RAIIHwHelperFactory<MockHwHelperWithFenceAllocation<FamilyType>> hwHelperBackup{pDevice->getHardwareInfo().platform.eRenderCoreFamily};
MockCsrHw<FamilyType> csr(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
csr.setupContext(*pDevice->getDefaultEngine().osContext);
EXPECT_EQ(nullptr, csr.globalFenceAllocation);
EXPECT_TRUE(csr.createGlobalFenceAllocation());
ASSERT_NE(nullptr, csr.globalFenceAllocation);
EXPECT_EQ(GraphicsAllocation::AllocationType::GLOBAL_FENCE, csr.globalFenceAllocation->getAllocationType());
}
HWTEST_F(CommandStreamReceiverTest, givenCommandStreamReceiverWhenGettingFenceAllocationThenCorrectFenceAllocationIsReturned) {
RAIIHwHelperFactory<MockHwHelperWithFenceAllocation<FamilyType>> hwHelperBackup{pDevice->getHardwareInfo().platform.eRenderCoreFamily};
CommandStreamReceiverHw<FamilyType> csr(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
csr.setupContext(*pDevice->getDefaultEngine().osContext);
EXPECT_EQ(nullptr, csr.getGlobalFenceAllocation());
EXPECT_TRUE(csr.createGlobalFenceAllocation());
ASSERT_NE(nullptr, csr.getGlobalFenceAllocation());
EXPECT_EQ(GraphicsAllocation::AllocationType::GLOBAL_FENCE, csr.getGlobalFenceAllocation()->getAllocationType());
}
TEST(CommandStreamReceiverSimpleTest, givenNullHardwareDebugModeWhenInitializeTagAllocationIsCalledThenTagAllocationIsBeingAllocatedAndinitialValueIsMinusOne) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableNullHardware.set(true);
@ -1129,9 +1061,7 @@ TEST(CommandStreamReceiverSimpleTest, givenMultipleActivePartitionsWhenWaitingFo
}
TEST(CommandStreamReceiverMultiContextTests, givenMultipleCsrsWhenSameResourcesAreUsedThenResidencyIsProperlyHandled) {
auto executionEnvironment = platform()->peekExecutionEnvironment();
std::unique_ptr<MockDevice> device(Device::create<MockDevice>(executionEnvironment, 0u));
std::unique_ptr<MockDevice> device(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get(), 0u));
auto &commandStreamReceiver0 = *device->commandStreamReceivers[0];
auto &commandStreamReceiver1 = *device->commandStreamReceivers[1];
@ -1166,16 +1096,14 @@ TEST(CommandStreamReceiverMultiContextTests, givenMultipleCsrsWhenSameResourcesA
struct CreateAllocationForHostSurfaceTest : public ::testing::Test {
void SetUp() override {
executionEnvironment = platform()->peekExecutionEnvironment();
executionEnvironment->prepareRootDeviceEnvironments(1u);
executionEnvironment->rootDeviceEnvironments[0]->setHwInfo(&hwInfo);
gmockMemoryManager = new ::testing::NiceMock<GMockMemoryManager>(*executionEnvironment);
executionEnvironment->memoryManager.reset(gmockMemoryManager);
device.reset(MockDevice::create<MockDevice>(executionEnvironment, 0u));
executionEnvironment.incRefInternal();
gmockMemoryManager = new ::testing::NiceMock<GMockMemoryManager>(executionEnvironment);
executionEnvironment.memoryManager.reset(gmockMemoryManager);
device.reset(MockDevice::createWithExecutionEnvironment<MockDevice>(&hwInfo, &executionEnvironment, 0u));
commandStreamReceiver = &device->getGpgpuCommandStreamReceiver();
}
MockExecutionEnvironment executionEnvironment;
HardwareInfo hwInfo = *defaultHwInfo;
ExecutionEnvironment *executionEnvironment = nullptr;
GMockMemoryManager *gmockMemoryManager = nullptr;
std::unique_ptr<MockDevice> device;
CommandStreamReceiver *commandStreamReceiver = nullptr;
@ -1383,6 +1311,7 @@ HWTEST_F(CommandStreamReceiverTest, whenCreatingCommandStreamReceiverThenLastAdd
HWTEST_F(CommandStreamReceiverTest, givenDebugFlagWhenCreatingCsrThenSetEnableStaticPartitioningAccordingly) {
DebugManagerStateRestore restore{};
VariableBackup<bool> backup(&ImplicitScaling::apiSupport, true);
{
UltDeviceFactory deviceFactory{1, 2};
@ -1548,64 +1477,6 @@ HWTEST_F(SimulatedCommandStreamReceiverTest, givenOsContextWithNoDeviceBitfieldW
EXPECT_EQ(0u, csr.getDeviceIndex());
}
using CommandStreamReceiverMultiRootDeviceTest = MultiRootDeviceFixture;
TEST_F(CommandStreamReceiverMultiRootDeviceTest, WhenCreatingCommandStreamGraphicsAllocationsThenTheyHaveCorrectRootDeviceIndex) {
auto commandStreamReceiver = &device1->getGpgpuCommandStreamReceiver();
ASSERT_NE(nullptr, commandStreamReceiver);
EXPECT_EQ(expectedRootDeviceIndex, commandStreamReceiver->getRootDeviceIndex());
// Linear stream / Command buffer
GraphicsAllocation *allocation = mockMemoryManager->allocateGraphicsMemoryWithProperties({expectedRootDeviceIndex, 128u, GraphicsAllocation::AllocationType::COMMAND_BUFFER, device1->getDeviceBitfield()});
LinearStream commandStream{allocation};
commandStreamReceiver->ensureCommandBufferAllocation(commandStream, 100u, 0u);
EXPECT_EQ(allocation, commandStream.getGraphicsAllocation());
EXPECT_EQ(128u, commandStream.getMaxAvailableSpace());
EXPECT_EQ(expectedRootDeviceIndex, commandStream.getGraphicsAllocation()->getRootDeviceIndex());
commandStreamReceiver->ensureCommandBufferAllocation(commandStream, 1024u, 0u);
EXPECT_NE(allocation, commandStream.getGraphicsAllocation());
EXPECT_EQ(0u, commandStream.getMaxAvailableSpace() % MemoryConstants::pageSize64k);
EXPECT_EQ(expectedRootDeviceIndex, commandStream.getGraphicsAllocation()->getRootDeviceIndex());
mockMemoryManager->freeGraphicsMemory(commandStream.getGraphicsAllocation());
// Debug surface
auto debugSurface = commandStreamReceiver->allocateDebugSurface(MemoryConstants::pageSize);
ASSERT_NE(nullptr, debugSurface);
EXPECT_EQ(expectedRootDeviceIndex, debugSurface->getRootDeviceIndex());
// Indirect heaps
IndirectHeap::Type heapTypes[]{IndirectHeap::DYNAMIC_STATE, IndirectHeap::INDIRECT_OBJECT, IndirectHeap::SURFACE_STATE};
for (auto heapType : heapTypes) {
IndirectHeap *heap = nullptr;
commandStreamReceiver->allocateHeapMemory(heapType, MemoryConstants::pageSize, heap);
ASSERT_NE(nullptr, heap);
ASSERT_NE(nullptr, heap->getGraphicsAllocation());
EXPECT_EQ(expectedRootDeviceIndex, heap->getGraphicsAllocation()->getRootDeviceIndex());
mockMemoryManager->freeGraphicsMemory(heap->getGraphicsAllocation());
delete heap;
}
// Tag allocation
ASSERT_NE(nullptr, commandStreamReceiver->getTagAllocation());
EXPECT_EQ(expectedRootDeviceIndex, commandStreamReceiver->getTagAllocation()->getRootDeviceIndex());
// Preemption allocation
if (nullptr == commandStreamReceiver->getPreemptionAllocation()) {
commandStreamReceiver->createPreemptionAllocation();
}
EXPECT_EQ(expectedRootDeviceIndex, commandStreamReceiver->getPreemptionAllocation()->getRootDeviceIndex());
// HostPtr surface
char memory[8] = {1, 2, 3, 4, 5, 6, 7, 8};
HostPtrSurface surface(memory, sizeof(memory), true);
EXPECT_TRUE(commandStreamReceiver->createAllocationForHostSurface(surface, false));
ASSERT_NE(nullptr, surface.getAllocation());
EXPECT_EQ(expectedRootDeviceIndex, surface.getAllocation()->getRootDeviceIndex());
}
using CommandStreamReceiverPageTableManagerTest = ::testing::Test;
TEST_F(CommandStreamReceiverPageTableManagerTest, givenExistingPageTableManagerWhenNeedsPageTableManagerIsCalledThenFalseIsReturned) {
@ -1632,3 +1503,49 @@ TEST_F(CommandStreamReceiverPageTableManagerTest, givenNonExisitingPageTableMana
EXPECT_EQ(supportsPageTableManager, commandStreamReceiver.needsPageTableManager());
}
TEST(CreateWorkPartitionAllocationTest, givenDisabledBlitterWhenInitializingWorkPartitionAllocationThenFallbackToCpuCopy) {
DebugManagerStateRestore restore{};
VariableBackup<bool> backup(&ImplicitScaling::apiSupport, true);
UltDeviceFactory deviceFactory{1, 2};
MockDevice &device = *deviceFactory.rootDevices[0];
auto memoryManager = static_cast<MockMemoryManager *>(device.getMemoryManager());
auto commandStreamReceiver = device.getDefaultEngine().commandStreamReceiver;
memoryManager->freeGraphicsMemory(commandStreamReceiver->getWorkPartitionAllocation());
DebugManager.flags.EnableBlitterOperationsSupport.set(0);
memoryManager->copyMemoryToAllocationBanksCalled = 0u;
memoryManager->copyMemoryToAllocationBanksParamsPassed.clear();
auto retVal = commandStreamReceiver->createWorkPartitionAllocation(device);
EXPECT_TRUE(retVal);
EXPECT_EQ(2u, memoryManager->copyMemoryToAllocationBanksCalled);
EXPECT_EQ(deviceFactory.subDevices[0]->getDeviceBitfield(), memoryManager->copyMemoryToAllocationBanksParamsPassed[0].handleMask);
EXPECT_EQ(deviceFactory.subDevices[1]->getDeviceBitfield(), memoryManager->copyMemoryToAllocationBanksParamsPassed[1].handleMask);
for (auto i = 0; i < 2; i++) {
EXPECT_EQ(commandStreamReceiver->getWorkPartitionAllocation(), memoryManager->copyMemoryToAllocationBanksParamsPassed[i].graphicsAllocation);
EXPECT_EQ(sizeof(uint32_t), memoryManager->copyMemoryToAllocationBanksParamsPassed[i].sizeToCopy);
EXPECT_NE(nullptr, memoryManager->copyMemoryToAllocationBanksParamsPassed[i].memoryToCopy);
}
}
TEST(CreateWorkPartitionAllocationTest, givenEnabledBlitterWhenInitializingWorkPartitionAllocationThenDontCopyOnCpu) {
DebugManagerStateRestore restore{};
VariableBackup<bool> backup(&ImplicitScaling::apiSupport, true);
UltDeviceFactory deviceFactory{1, 2};
MockDevice &device = *deviceFactory.rootDevices[0];
auto memoryManager = static_cast<MockMemoryManager *>(device.getMemoryManager());
auto commandStreamReceiver = device.getDefaultEngine().commandStreamReceiver;
device.getRootDeviceEnvironmentRef().getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true;
REQUIRE_BLITTER_OR_SKIP(&device.getHardwareInfo());
memoryManager->freeGraphicsMemory(commandStreamReceiver->getWorkPartitionAllocation());
memoryManager->copyMemoryToAllocationBanksCalled = 0u;
memoryManager->copyMemoryToAllocationBanksParamsPassed.clear();
auto retVal = commandStreamReceiver->createWorkPartitionAllocation(device);
EXPECT_TRUE(retVal);
EXPECT_EQ(0u, memoryManager->copyMemoryToAllocationBanksCalled);
}