performance: Memory handling improvements

By default prefer allocating memory first by KMD, instead of malloc first.

By default prefer not caching allocations on MTL devices. This results
in allocations being handled with non-coherent pat index.

For integrated devices when caching is not preferred do not allow
direct memory access in CPU domain. For map/unmap operations create
a dedicated memory allocation for CPU access, instead of accessing it
directly, reusing the same logic as when mapping/unmapping local memory.

Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
This commit is contained in:
Filip Hazubski
2023-07-19 17:13:58 +00:00
committed by Compute-Runtime-Automation
parent 1d07d999c8
commit 5b80bd4d7c
37 changed files with 175 additions and 18 deletions

View File

@@ -299,6 +299,8 @@ int main(int argc, char **argv) {
revId = platform.usRevId;
}
NEO::DebugManager.flags.ForcePreferredAllocationMethod.set(0); // Force allocation in system for ULTs
adjustHwInfoForTests(hwInfoForTests, euPerSubSlice, sliceCount, subSlicePerSliceCount, dieRecovery);
// Platforms with uninitialized factory are not supported

View File

@@ -750,6 +750,10 @@ size_t Buffer::calculateHostPtrSize(const size_t *origin, const size_t *region,
}
bool Buffer::isReadWriteOnCpuAllowed(const Device &device) {
if (!device.getProductHelper().isCachingOnCpuAvailable()) {
return false;
}
if (forceDisallowCPUCopy) {
return false;
}

View File

@@ -7,12 +7,15 @@
#include "opencl/source/mem_obj/mem_obj.h"
#include "shared/source/execution_environment/execution_environment.h"
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/gmm_helper/gmm.h"
#include "shared/source/gmm_helper/resource_info.h"
#include "shared/source/helpers/bit_helpers.h"
#include "shared/source/helpers/get_info.h"
#include "shared/source/memory_manager/allocation_properties.h"
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/os_interface/product_helper.h"
#include "shared/source/utilities/buffer_pool_allocator.inl"
#include "shared/source/utilities/heap_allocator.h"
@@ -429,8 +432,10 @@ bool MemObj::isTiledAllocation() const {
bool MemObj::mappingOnCpuAllowed() const {
auto graphicsAllocation = multiGraphicsAllocation.getDefaultGraphicsAllocation();
auto &productHelper = this->executionEnvironment->rootDeviceEnvironments[graphicsAllocation->getRootDeviceIndex()]->getProductHelper();
return !isTiledAllocation() && !peekSharingHandler() && !isMipMapped(this) && !DebugManager.flags.DisableZeroCopyForBuffers.get() &&
!graphicsAllocation->isCompressionEnabled() && MemoryPoolHelper::isSystemMemoryPool(graphicsAllocation->getMemoryPool());
!graphicsAllocation->isCompressionEnabled() && MemoryPoolHelper::isSystemMemoryPool(graphicsAllocation->getMemoryPool()) &&
productHelper.isCachingOnCpuAvailable();
}
void MemObj::storeProperties(const cl_mem_properties *properties) {

View File

@@ -54,6 +54,8 @@ TEST_F(ClEnqueueMapBufferTests, GivenNullCommandQueueWhenMappingBufferThenInvali
}
TEST_F(ClEnqueueMapBufferTests, GivenValidParametersWhenMappingBufferThenSuccessIsReturned) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pContext->getDevice(0)->getRootDeviceEnvironment());
unsigned int bufferSize = 16;
auto pHostMem = new unsigned char[bufferSize];
memset(pHostMem, 0xaa, bufferSize);
@@ -111,6 +113,8 @@ TEST_F(ClEnqueueMapBufferTests, GivenQueueIncapableWhenMappingBufferThenInvalidO
}
TEST_F(ClEnqueueMapBufferTests, GivenMappedPointerWhenCreatingBufferFromThisPointerThenInvalidHostPtrErrorIsReturned) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pContext->getDevice(0)->getRootDeviceEnvironment());
unsigned int bufferSize = 16;
cl_mem buffer = clCreateBuffer(pContext, CL_MEM_READ_WRITE, bufferSize, nullptr, &retVal);

View File

@@ -53,6 +53,8 @@ TEST_F(ClEnqueueUnmapMemObjTests, GivenQueueIncapableWhenUnmappingBufferThenInva
}
TEST_F(ClEnqueueUnmapMemObjTests, givenInvalidAddressWhenUnmappingOnCpuThenReturnError) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pContext->getDevice(0)->getRootDeviceEnvironment());
auto buffer = std::unique_ptr<Buffer>(BufferHelper<BufferUseHostPtr<>>::create(pContext));
EXPECT_TRUE(buffer->mappingOnCpuAllowed());
cl_int retVal = CL_SUCCESS;

View File

@@ -15,6 +15,7 @@
#include "shared/test/common/mocks/mock_os_library.h"
#include "shared/test/common/mocks/mock_source_level_debugger.h"
#include "shared/test/common/mocks/mock_timestamp_container.h"
#include "shared/test/common/test_macros/test_checks_shared.h"
#include "shared/test/common/utilities/base_object_utils.h"
#include "opencl/test/unit_test/command_queue/command_queue_fixture.h"
@@ -430,6 +431,8 @@ HWTEST_F(CommandQueueHwTest, GivenNonEmptyQueueOnBlockingWhenMappingBufferThenWi
bool finishWasCalled;
};
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(context->getDevice(0)->getRootDeviceEnvironment());
MockCmdQ cmdQ(context, pCmdQ->getDevice().getSpecializedDevice<ClDevice>());
auto b1 = clCreateBuffer(context, CL_MEM_READ_WRITE, 20, nullptr, nullptr);
@@ -476,6 +479,8 @@ HWTEST_F(CommandQueueHwTest, GivenEventsWaitlistOnBlockingWhenMappingBufferThenW
uint32_t updateCountBeforeCompleted;
};
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(context->getDevice(0)->getRootDeviceEnvironment());
MockEvent *me = new MockEvent(context, 1024);
auto b1 = clCreateBuffer(context, CL_MEM_READ_WRITE, 20, nullptr, nullptr);
cl_event meAsClEv = me;

View File

@@ -265,6 +265,8 @@ TEST_F(EnqueueMapBufferTest, GivenValidArgsWhenMappingBufferThenSuccessIsReturne
}
HWTEST_F(EnqueueMapBufferTest, givenNonBlockingReadOnlyMapBufferOnZeroCopyBufferWhenItIsCalledThenSynchronizationIsNotMadeUntilWaitForEvents) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pClDevice->getRootDeviceEnvironment());
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableAsyncEventsHandler.set(false);
cl_event mapEventReturned = nullptr;
@@ -463,6 +465,8 @@ TEST_F(EnqueueMapBufferTest, givenReadOnlyBufferWhenMappedOnGpuThenSetValidEvent
}
TEST_F(EnqueueMapBufferTest, givenNonBlockingMapBufferAfterL3IsAlreadyFlushedThenEventIsSignaledAsCompleted) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pClDevice->getRootDeviceEnvironment());
cl_event eventReturned = nullptr;
uint32_t tagHW = 0;
*pTagMemory = tagHW;
@@ -530,6 +534,8 @@ TEST_F(EnqueueMapBufferTest, givenNonBlockingMapBufferAfterL3IsAlreadyFlushedThe
}
HWTEST_F(EnqueueMapBufferTest, GivenBufferThatIsNotZeroCopyWhenNonBlockingMapIsCalledThenFinishIsCalledAndDataTransferred) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pClDevice->getRootDeviceEnvironment());
const auto bufferSize = 100;
auto localSize = bufferSize;
char misaligned[bufferSize] = {1};
@@ -611,6 +617,8 @@ TEST_F(EnqueueMapBufferTest, GivenWrongMemObjectWhenMapIsCalledThenInvalidMemObj
}
HWTEST_F(EnqueueMapBufferTest, GivenPtrToReturnEventWhenMappingBufferThenEventIsNotNull) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pClDevice->getRootDeviceEnvironment());
cl_event eventReturned = NULL;
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
@@ -658,6 +666,8 @@ HWTEST_F(EnqueueMapBufferTest, GivenPtrToReturnEventWhenMappingBufferThenEventIs
}
TEST_F(EnqueueMapBufferTest, GivenZeroCopyBufferWhenMapBufferWithoutEventsThenCommandStreamReceiverUpdatesRequiredDCFlushCount) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pClDevice->getRootDeviceEnvironment());
auto &commandStreamReceiver = pCmdQ->getGpgpuCommandStreamReceiver();
auto buffer = clCreateBuffer(
@@ -691,6 +701,8 @@ TEST_F(EnqueueMapBufferTest, GivenZeroCopyBufferWhenMapBufferWithoutEventsThenCo
}
TEST_F(EnqueueMapBufferTest, givenBufferWithoutUseHostPtrFlagWhenMappedOnCpuThenSetAllMapParams) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pClDevice->getRootDeviceEnvironment());
std::unique_ptr<Buffer> buffer(Buffer::create(BufferDefaults::context, CL_MEM_READ_WRITE, 10, nullptr, retVal));
EXPECT_NE(nullptr, buffer);
EXPECT_TRUE(buffer->mappingOnCpuAllowed());
@@ -720,6 +732,8 @@ TEST_F(EnqueueMapBufferTest, givenBufferWithoutUseHostPtrFlagWhenMappedOnCpuThen
}
TEST_F(EnqueueMapBufferTest, givenBufferWithUseHostPtrFlagWhenMappedOnCpuThenSetAllMapParams) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pClDevice->getRootDeviceEnvironment());
uint8_t hostPtr[10] = {};
std::unique_ptr<Buffer> buffer(Buffer::create(BufferDefaults::context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, 10, hostPtr, retVal));
EXPECT_NE(nullptr, buffer);
@@ -750,6 +764,8 @@ TEST_F(EnqueueMapBufferTest, givenBufferWithUseHostPtrFlagWhenMappedOnCpuThenSet
}
HWTEST_F(EnqueueMapBufferTest, givenMapBufferOnGpuWhenMappingBufferThenStoreGraphicsAllocationInMapInfo) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pClDevice->getRootDeviceEnvironment());
uint8_t hostPtr[10] = {};
std::unique_ptr<Buffer> bufferForCpuMap(Buffer::create(context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, 10, hostPtr, retVal));
ASSERT_NE(nullptr, bufferForCpuMap);

View File

@@ -496,6 +496,7 @@ HWTEST_F(EnqueueMapImageTest, GivenPtrToReturnEventWhenMappingImageThenEventIsNo
}
HWTEST_F(EnqueueMapImageTest, givenZeroCopyImageWhenItIsMappedAndReturnsEventThenEventHasCorrectProperties) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pDevice->getRootDeviceEnvironment());
cl_event eventReturned = nullptr;
auto mapFlags = CL_MAP_READ;
const size_t origin[3] = {0, 0, 0};
@@ -647,6 +648,7 @@ HWTEST_F(EnqueueMapImageTest, givenSharingHandlerWhenReadOnlyMapAndUnmapOnNonTil
}
HWTEST_F(EnqueueMapImageTest, givenImageWithouUsetHostPtrFlagWhenMappedOnCpuThenSetAllMapProperties) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pDevice->getRootDeviceEnvironment());
std::unique_ptr<Image> image(ImageHelper<Image1dDefaults>::create(context));
ASSERT_NE(nullptr, image);
EXPECT_TRUE(image->mappingOnCpuAllowed());
@@ -675,6 +677,7 @@ HWTEST_F(EnqueueMapImageTest, givenImageWithouUsetHostPtrFlagWhenMappedOnCpuThen
}
HWTEST_F(EnqueueMapImageTest, givenImageWithUseHostPtrFlagWhenMappedOnCpuThenSetAllMapProperties) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pDevice->getRootDeviceEnvironment());
std::unique_ptr<Image> image(ImageHelper<ImageUseHostPtr<Image1dDefaults>>::create(context));
ASSERT_NE(nullptr, image);
EXPECT_TRUE(image->mappingOnCpuAllowed());
@@ -786,6 +789,7 @@ TEST_F(EnqueueMapImageTest, givenBlockedCommandQueueWhenBlockingMapWith1DImageIs
}
TEST_F(EnqueueMapImageTest, givenBlockedCommandQueueWhenBlockingCpuMapIsCalledThenReturnRowPitchAndSlicePitch) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pDevice->getRootDeviceEnvironment());
const size_t origin[3] = {0, 0, 0};
const size_t region[3] = {1, 1, 1};
size_t retImageRowPitch = 0;
@@ -823,6 +827,7 @@ TEST_F(EnqueueMapImageTest, givenBlockedCommandQueueWhenBlockingCpuMapIsCalledTh
}
TEST_F(EnqueueMapImageTest, givenZeroCopyImageWhenMappedOnCpuThenReturnImageRowAndSlicePitch) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pDevice->getRootDeviceEnvironment());
const size_t origin[3] = {0, 0, 0};
const size_t region[3] = {1, 1, 1};
size_t retImageRowPitch = 0;
@@ -842,6 +847,7 @@ TEST_F(EnqueueMapImageTest, givenZeroCopyImageWhenMappedOnCpuThenReturnImageRowA
}
TEST_F(EnqueueMapImageTest, givenNonZeroCopyImageWhenMappedOnCpuThenReturnHostRowAndSlicePitch) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pDevice->getRootDeviceEnvironment());
const size_t origin[3] = {0, 0, 0};
const size_t region[3] = {1, 1, 1};
size_t retImageRowPitch = 0;
@@ -948,6 +954,8 @@ TEST_F(EnqueueMapImageTest, givenImage1DArrayWhenEnqueueMapImageIsCalledThenRetu
void transformImage3dTo2dArray(void *memory) override {}
};
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pDevice->getRootDeviceEnvironment());
const size_t origin[3] = {0, 0, 0};
const size_t region[3] = {1, 1, 1};
size_t retImageRowPitch = 0;

View File

@@ -102,6 +102,7 @@ TEST_F(EnqueueReadBuffer, WhenReadingBufferThenEventReturnedShouldBeMaxOfInputEv
delete pEvent;
}
TEST_F(EnqueueReadBuffer, givenInOrderQueueAndForcedCpuCopyOnReadBufferAndDstPtrEqualSrcPtrWithEventsNotBlockedWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pClDevice->getRootDeviceEnvironment());
DebugManagerStateRestore dbgRestore;
DebugManager.flags.DoCpuCopyOnReadBuffer.set(1);
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::Default));
@@ -179,6 +180,7 @@ TEST_F(EnqueueReadBuffer, givenInOrderQueueAndForcedCpuCopyOnReadBufferAndDstPtr
}
TEST_F(EnqueueReadBuffer, givenOutOfOrderQueueAndForcedCpuCopyOnReadBufferAndDstPtrEqualSrcPtrWithEventsNotBlockedWhenReadBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pClDevice->getRootDeviceEnvironment());
DebugManagerStateRestore dbgRestore;
DebugManager.flags.DoCpuCopyOnReadBuffer.set(1);
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::Default));

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2022 Intel Corporation
* Copyright (C) 2018-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -599,6 +599,7 @@ HWTEST_F(EnqueueReadBufferTypeTest, givenEnqueueReadBufferCalledWhenLockedPtrInT
}
HWTEST_F(EnqueueReadBufferTypeTest, givenForcedCpuCopyWhenEnqueueReadCompressedBufferThenDontCopyOnCpu) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pClDevice->getRootDeviceEnvironment());
DebugManagerStateRestore dbgRestore;
DebugManager.flags.DoCpuCopyOnReadBuffer.set(1);
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::Default));

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2022 Intel Corporation
* Copyright (C) 2018-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -119,6 +119,7 @@ TEST_F(EnqueueUnmapMemObjTest, WhenUnmappingMemoryObjectThenReturnedEventHasGrea
}
HWTEST_F(EnqueueUnmapMemObjTest, WhenUnmappingMemoryObjectThenEventIsUpdated) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pDevice->getRootDeviceEnvironment());
cl_event eventReturned = NULL;
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
@@ -142,6 +143,7 @@ HWTEST_F(EnqueueUnmapMemObjTest, WhenUnmappingMemoryObjectThenEventIsUpdated) {
}
TEST_F(EnqueueUnmapMemObjTest, WhenUnmappingMemoryObjectThenWaitEventIsUpdated) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pDevice->getRootDeviceEnvironment());
cl_event waitEvent = nullptr;
cl_event retEvent = nullptr;
@@ -196,6 +198,7 @@ TEST_F(EnqueueUnmapMemObjTest, WhenUnmappingMemoryObjectThenWaitEventIsUpdated)
}
HWTEST_F(EnqueueUnmapMemObjTest, givenEnqueueUnmapMemObjectWhenNonAubWritableBufferObjectMappedToHostPtrForWritingThenItShouldBeResetToAubAndTbxWritable) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pDevice->getRootDeviceEnvironment());
auto buffer = std::unique_ptr<Buffer>(BufferHelper<>::create());
ASSERT_NE(nullptr, buffer);
auto graphicsAllocation = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex());
@@ -218,6 +221,7 @@ HWTEST_F(EnqueueUnmapMemObjTest, givenEnqueueUnmapMemObjectWhenNonAubWritableBuf
}
HWTEST_F(EnqueueUnmapMemObjTest, givenWriteBufferIsServicedOnCPUWhenBufferIsNonAubTbxWriteableThenFlagsChange) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pDevice->getRootDeviceEnvironment());
DebugManagerStateRestore restorer;
DebugManager.flags.DoCpuCopyOnWriteBuffer.set(1);
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::Default));

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -102,6 +102,7 @@ TEST_F(EnqueueWriteBufferTypeTest, WhenWritingBufferThenReturnedEventShouldBeMax
}
TEST_F(EnqueueWriteBufferTypeTest, givenInOrderQueueAndForcedCpuCopyOnWriteBufferAndDstPtrEqualSrcPtrWithEventsNotBlockedWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pClDevice->getRootDeviceEnvironment());
DebugManagerStateRestore dbgRestore;
DebugManager.flags.DoCpuCopyOnWriteBuffer.set(1);
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::Default));
@@ -217,6 +218,7 @@ TEST_F(EnqueueWriteBufferTypeTest, givenInOrderQueueAndForcedCpuCopyOnWriteBuffe
}
TEST_F(EnqueueWriteBufferTypeTest, givenOutOfOrderQueueAndForcedCpuCopyOnWriteBufferAndDstPtrEqualSrcPtrWithEventsWhenWriteBufferIsExecutedThenTaskLevelShouldNotBeIncreased) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pClDevice->getRootDeviceEnvironment());
DebugManagerStateRestore dbgRestore;
DebugManager.flags.DoCpuCopyOnWriteBuffer.set(1);
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::Default));

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2022 Intel Corporation
* Copyright (C) 2018-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -428,6 +428,7 @@ HWTEST_F(EnqueueWriteBufferTypeTest, givenEnqueueWriteBufferCalledWhenLockedPtrI
}
HWTEST_F(EnqueueWriteBufferTypeTest, givenForcedCpuCopyWhenEnqueueWriteCompressedBufferThenDontCopyOnCpu) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pClDevice->getRootDeviceEnvironment());
DebugManagerStateRestore dbgRestore;
DebugManager.flags.DoCpuCopyOnWriteBuffer.set(1);
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::Default));

View File

@@ -8,6 +8,7 @@
#include "shared/test/common/mocks/mock_allocation_properties.h"
#include "shared/test/common/mocks/mock_gmm.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "shared/test/common/test_macros/test_checks_shared.h"
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/event/user_event.h"
@@ -229,6 +230,7 @@ HWTEST_F(MultipleMapBufferTest, givenErrorFromWriteBufferWhenUnmappedOnGpuThenDo
}
HWTEST_F(MultipleMapBufferTest, givenUnblockedQueueWhenMappedOnCpuThenAddMappedPtrAndRemoveOnUnmap) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pClDevice->getRootDeviceEnvironment());
auto buffer = createMockBuffer<FamilyType>(false);
auto cmdQ = createMockCmdQ<FamilyType>();
EXPECT_TRUE(buffer->mappingOnCpuAllowed());
@@ -250,6 +252,7 @@ HWTEST_F(MultipleMapBufferTest, givenUnblockedQueueWhenMappedOnCpuThenAddMappedP
}
HWTEST_F(MultipleMapBufferTest, givenUnblockedQueueWhenReadOnlyMappedOnCpuThenDontMakeCpuCopy) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pClDevice->getRootDeviceEnvironment());
auto buffer = createMockBuffer<FamilyType>(false);
auto cmdQ = createMockCmdQ<FamilyType>();
EXPECT_TRUE(buffer->mappingOnCpuAllowed());
@@ -267,6 +270,7 @@ HWTEST_F(MultipleMapBufferTest, givenUnblockedQueueWhenReadOnlyMappedOnCpuThenDo
}
HWTEST_F(MultipleMapBufferTest, givenUnblockedQueueWhenWriteInvalidateMappedOnCpuThenDontMakeCpuCopy) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pClDevice->getRootDeviceEnvironment());
auto buffer = createMockBuffer<FamilyType>(false);
auto cmdQ = createMockCmdQ<FamilyType>();
EXPECT_TRUE(buffer->mappingOnCpuAllowed());
@@ -284,6 +288,7 @@ HWTEST_F(MultipleMapBufferTest, givenUnblockedQueueWhenWriteInvalidateMappedOnCp
}
HWTEST_F(MultipleMapBufferTest, givenBlockedQueueWhenMappedOnCpuThenAddMappedPtrAndRemoveOnUnmap) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pClDevice->getRootDeviceEnvironment());
auto buffer = createMockBuffer<FamilyType>(false);
auto cmdQ = createMockCmdQ<FamilyType>();
EXPECT_TRUE(buffer->mappingOnCpuAllowed());
@@ -311,6 +316,7 @@ HWTEST_F(MultipleMapBufferTest, givenBlockedQueueWhenMappedOnCpuThenAddMappedPtr
}
HWTEST_F(MultipleMapBufferTest, givenBlockedQueueWhenMappedReadOnlyOnCpuThenDontMakeCpuCopy) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pClDevice->getRootDeviceEnvironment());
auto buffer = createMockBuffer<FamilyType>(false);
auto cmdQ = createMockCmdQ<FamilyType>();
EXPECT_TRUE(buffer->mappingOnCpuAllowed());
@@ -334,6 +340,7 @@ HWTEST_F(MultipleMapBufferTest, givenBlockedQueueWhenMappedReadOnlyOnCpuThenDont
}
HWTEST_F(MultipleMapBufferTest, givenInvalidPtrWhenUnmappedOnCpuThenReturnError) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pClDevice->getRootDeviceEnvironment());
auto buffer = createMockBuffer<FamilyType>(false);
auto cmdQ = createMockCmdQ<FamilyType>();
EXPECT_TRUE(buffer->mappingOnCpuAllowed());
@@ -401,6 +408,7 @@ HWTEST_F(MultipleMapBufferTest, givenOverlapingPtrWhenMappingOnGpuForWriteThenRe
}
HWTEST_F(MultipleMapBufferTest, givenOverlapingPtrWhenMappingOnCpuForWriteThenReturnError) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pClDevice->getRootDeviceEnvironment());
auto buffer = createMockBuffer<FamilyType>(false);
auto cmdQ = createMockCmdQ<FamilyType>();
EXPECT_TRUE(buffer->mappingOnCpuAllowed());
@@ -418,3 +426,9 @@ HWTEST_F(MultipleMapBufferTest, givenOverlapingPtrWhenMappingOnCpuForWriteThenRe
EXPECT_EQ(CL_INVALID_OPERATION, retVal);
EXPECT_EQ(1u, buffer->getMapOperationsHandler().size());
}
HWTEST_F(MultipleMapBufferTest, givenCachingOnCpuUnavailableWhenMappingOnCpuAllowedIsCalledThenReturnFalse) {
auto buffer = createMockBuffer<FamilyType>(false);
auto &productHelper = pClDevice->getRootDeviceEnvironment().getHelper<ProductHelper>();
EXPECT_EQ(productHelper.isCachingOnCpuAvailable(), buffer->mappingOnCpuAllowed());
}

View File

@@ -8,6 +8,7 @@
#include "shared/test/common/helpers/unit_test_helper.h"
#include "shared/test/common/helpers/variable_backup.h"
#include "shared/test/common/test_macros/test.h"
#include "shared/test/common/test_macros/test_checks_shared.h"
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/event/user_event.h"
@@ -265,6 +266,7 @@ HWTEST_F(MultipleMapImageTest, givenErrorFromWriteImageWhenUnmappedOnGpuThenDont
}
HWTEST_F(MultipleMapImageTest, givenUnblockedQueueWhenMappedOnCpuThenAddMappedPtrAndRemoveOnUnmap) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pClDevice->getRootDeviceEnvironment());
auto image = createMockImage<Image1dDefaults, FamilyType>();
auto cmdQ = createMockCmdQ<FamilyType>();
image->isZeroCopy = false;
@@ -287,6 +289,7 @@ HWTEST_F(MultipleMapImageTest, givenUnblockedQueueWhenMappedOnCpuThenAddMappedPt
}
HWTEST_F(MultipleMapImageTest, givenUnblockedQueueWhenReadOnlyUnmappedOnCpuThenDontMakeCpuCopy) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pClDevice->getRootDeviceEnvironment());
auto image = createMockImage<Image1dDefaults, FamilyType>();
auto cmdQ = createMockCmdQ<FamilyType>();
image->isZeroCopy = false;
@@ -308,6 +311,7 @@ HWTEST_F(MultipleMapImageTest, givenUnblockedQueueWhenReadOnlyUnmappedOnCpuThenD
}
HWTEST_F(MultipleMapImageTest, givenUnblockedQueueWhenWriteInvalidateMappedOnCpuThenDontMakeCpuCopy) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pClDevice->getRootDeviceEnvironment());
auto image = createMockImage<Image1dDefaults, FamilyType>();
auto cmdQ = createMockCmdQ<FamilyType>();
image->isZeroCopy = false;
@@ -329,6 +333,7 @@ HWTEST_F(MultipleMapImageTest, givenUnblockedQueueWhenWriteInvalidateMappedOnCpu
}
HWTEST_F(MultipleMapImageTest, givenBlockedQueueWhenMappedOnCpuThenAddMappedPtrAndRemoveOnUnmap) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pClDevice->getRootDeviceEnvironment());
auto image = createMockImage<Image1dDefaults, FamilyType>();
auto cmdQ = createMockCmdQ<FamilyType>();
image->isZeroCopy = false;
@@ -358,6 +363,7 @@ HWTEST_F(MultipleMapImageTest, givenBlockedQueueWhenMappedOnCpuThenAddMappedPtrA
}
HWTEST_F(MultipleMapImageTest, givenBlockedQueueWhenMappedReadOnlyOnCpuThenDontMakeCpuCopy) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pClDevice->getRootDeviceEnvironment());
auto image = createMockImage<Image1dDefaults, FamilyType>();
auto cmdQ = createMockCmdQ<FamilyType>();
image->isZeroCopy = false;
@@ -385,6 +391,7 @@ HWTEST_F(MultipleMapImageTest, givenBlockedQueueWhenMappedReadOnlyOnCpuThenDontM
}
HWTEST_F(MultipleMapImageTest, givenInvalidPtrWhenUnmappedOnCpuThenReturnError) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pClDevice->getRootDeviceEnvironment());
auto image = createMockImage<Image1dDefaults, FamilyType>();
auto cmdQ = createMockCmdQ<FamilyType>();
EXPECT_TRUE(image->mappingOnCpuAllowed());
@@ -453,6 +460,7 @@ HWTEST_F(MultipleMapImageTest, givenOverlapingPtrWhenMappingForWriteThenReturnEr
}
HWTEST_F(MultipleMapImageTest, givenOverlapingPtrWhenMappingOnCpuForWriteThenReturnError) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pClDevice->getRootDeviceEnvironment());
auto image = createMockImage<Image1dDefaults, FamilyType>();
auto cmdQ = createMockCmdQ<FamilyType>();
EXPECT_TRUE(image->mappingOnCpuAllowed());

View File

@@ -11,6 +11,7 @@
#include "shared/source/memory_manager/memory_allocation.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/test_macros/test.h"
#include "shared/test/common/test_macros/test_checks_shared.h"
#include "opencl/test/unit_test/command_queue/enqueue_read_buffer_fixture.h"
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
@@ -20,6 +21,7 @@ using namespace NEO;
typedef EnqueueReadBufferTypeTest ReadWriteBufferCpuCopyTest;
HWTEST_F(ReadWriteBufferCpuCopyTest, givenCompressedGmmWhenAskingForCpuOperationThenDisallow) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(context->getDevice(0)->getRootDeviceEnvironment());
DebugManagerStateRestore restorer;
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::Default));
cl_int retVal;
@@ -44,6 +46,7 @@ HWTEST_F(ReadWriteBufferCpuCopyTest, givenCompressedGmmWhenAskingForCpuOperation
}
HWTEST_F(ReadWriteBufferCpuCopyTest, GivenUnalignedReadPtrWhenReadingBufferThenMemoryIsReadCorrectly) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(context->getDevice(0)->getRootDeviceEnvironment());
DebugManagerStateRestore restorer;
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::Default));
cl_int retVal;
@@ -85,6 +88,7 @@ HWTEST_F(ReadWriteBufferCpuCopyTest, GivenUnalignedReadPtrWhenReadingBufferThenM
}
HWTEST_F(ReadWriteBufferCpuCopyTest, GivenUnalignedSrcPtrWhenWritingBufferThenMemoryIsWrittenCorrectly) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(context->getDevice(0)->getRootDeviceEnvironment());
DebugManagerStateRestore restorer;
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::Default));
cl_int retVal;
@@ -226,6 +230,7 @@ HWTEST_F(ReadWriteBufferCpuCopyTest, GivenSpecificMemoryStructuresWhenReadingWri
}
HWTEST_F(ReadWriteBufferCpuCopyTest, givenDebugVariableToDisableCpuCopiesWhenBufferCpuCopyAllowedIsCalledThenItReturnsFalse) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(context->getDevice(0)->getRootDeviceEnvironment());
DebugManagerStateRestore restorer;
DebugManager.flags.EnableLocalMemory.set(false);
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::Default));
@@ -252,6 +257,13 @@ HWTEST_F(ReadWriteBufferCpuCopyTest, givenDebugVariableToDisableCpuCopiesWhenBuf
EXPECT_FALSE(mockCommandQueue->bufferCpuCopyAllowed(buffer.get(), CL_COMMAND_WRITE_BUFFER, true, MemoryConstants::pageSize, reinterpret_cast<void *>(0x1000), 0u, nullptr));
}
HWTEST_F(ReadWriteBufferCpuCopyTest, givenCachingOnCpuUnavailableWhenIsReadWriteOnCpuAllowedThenReturnFalse) {
cl_int retVal;
std::unique_ptr<Buffer> buffer(Buffer::create(context, CL_MEM_ALLOC_HOST_PTR, MemoryConstants::pageSize, nullptr, retVal));
auto &productHelper = context->getDevice(0)->getRootDeviceEnvironment().getHelper<ProductHelper>();
EXPECT_EQ(productHelper.isCachingOnCpuAvailable(), buffer->isReadWriteOnCpuAllowed(context->getDevice(0)->getDevice()));
}
TEST(ReadWriteBufferOnCpu, givenNoHostPtrAndAlignedSizeWhenMemoryAllocationIsInNonSystemMemoryPoolThenIsReadWriteOnCpuAllowedReturnsFalse) {
DebugManagerStateRestore restorer;
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::Default));
@@ -259,6 +271,7 @@ TEST(ReadWriteBufferOnCpu, givenNoHostPtrAndAlignedSizeWhenMemoryAllocationIsInN
MockMemoryManager *memoryManager = new MockMemoryManager(*executionEnvironment);
executionEnvironment->memoryManager.reset(memoryManager);
auto device = std::make_unique<MockClDevice>(MockDevice::createWithExecutionEnvironment<MockDevice>(nullptr, executionEnvironment, 0u));
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(device->getRootDeviceEnvironment());
MockContext ctx(device.get());
@@ -283,6 +296,7 @@ TEST(ReadWriteBufferOnCpu, givenPointerThatRequiresCpuCopyWhenCpuCopyIsEvaluated
MockMemoryManager *memoryManager = new MockMemoryManager(*executionEnvironment);
executionEnvironment->memoryManager.reset(memoryManager);
auto device = std::make_unique<MockClDevice>(MockDevice::createWithExecutionEnvironment<MockDevice>(nullptr, executionEnvironment, 0u));
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(device->getRootDeviceEnvironment());
MockContext context(device.get());

View File

@@ -22,6 +22,7 @@
#include "shared/test/common/mocks/mock_gmm_page_table_mngr.h"
#include "shared/test/common/mocks/mock_submissions_aggregator.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "shared/test/common/test_macros/test_checks_shared.h"
#include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h"
#include "opencl/test/unit_test/mocks/mock_buffer.h"
@@ -227,6 +228,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenNonDcFlushWithInitialTaskCoun
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenDcFlushWhenFinishingThenTaskCountIncremented) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(pClDevice->getRootDeviceEnvironment());
MockContext ctx(pClDevice);
MockKernelWithInternals kernel(*pClDevice);
MockCommandQueueHw<FamilyType> mockCmdQueue(&ctx, pClDevice, nullptr);

View File

@@ -8,6 +8,7 @@
#include "shared/source/helpers/local_work_size.h"
#include "shared/source/memory_manager/unified_memory_manager.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/test_macros/test_checks_shared.h"
#include "opencl/source/command_queue/cl_local_work_size.h"
#include "opencl/source/command_queue/command_queue.h"
@@ -451,6 +452,7 @@ TEST_F(PerformanceHintEnqueueImageTest, GivenNonBlockingReadImageSharesStorageWi
}
TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagWhenEnqueueMapBufferIsCallingThenContextProvidesProperHint) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(context->getDevice(0)->getRootDeviceEnvironment());
Buffer *buffer;
void *address;
@@ -475,6 +477,7 @@ TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagWhenEnqueueMapBufferIsCal
delete buffer;
}
TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagAndBlockingEventWhenEnqueueMapBufferIsCallingThenContextProvidesProperHint) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(context->getDevice(0)->getRootDeviceEnvironment());
void *address;
bool zeroCopyBuffer = GetParam();
@@ -505,6 +508,7 @@ TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagAndBlockingEventWhenEnque
}
TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagWhenEnqueueMapImageIsCallingThenContextProvidesProperHint) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(context->getDevice(0)->getRootDeviceEnvironment());
Image *image;
bool isZeroCopyImage;
@@ -543,6 +547,7 @@ TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagWhenEnqueueMapImageIsCall
}
TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagAndBlockingEventWhenEnqueueMapImageIsCallingThenContextProvidesProperHint) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(context->getDevice(0)->getRootDeviceEnvironment());
auto image = std::unique_ptr<Image>(ImageHelper<ImageReadOnly<Image1dDefaults>>::create(context));
bool isZeroCopyImage = GetParam();
@@ -581,6 +586,7 @@ TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagAndBlockingEventWhenEnque
}
TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagWhenEnqueueUnmapIsCallingWithBufferThenContextProvidesProperHint) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(context->getDevice(0)->getRootDeviceEnvironment());
Buffer *buffer;
void *address;
@@ -607,6 +613,7 @@ TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagWhenEnqueueUnmapIsCalling
}
TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyAndBlockedEventFlagWhenEnqueueUnmapIsCallingWithBufferThenContextProvidesProperHint) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(context->getDevice(0)->getRootDeviceEnvironment());
void *address;
bool zeroCopyBuffer = GetParam();
@@ -638,6 +645,7 @@ TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyAndBlockedEventFlagWhenEnqueu
}
TEST_P(PerformanceHintEnqueueMapTest, GivenZeroCopyFlagWhenEnqueueUnmapIsCallingWithImageThenContextProvidesProperHint) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(context->getDevice(0)->getRootDeviceEnvironment());
Image *image;
bool isZeroCopyImage;

View File

@@ -1488,6 +1488,7 @@ TEST_F(EventTest, givenCmdQueueWithoutProfilingWhenIsCpuProfilingIsCalledThenFal
}
TEST_F(EventTest, givenOutEventWhenBlockingEnqueueHandledOnCpuThenUpdateTaskCountAndFlushStampFromCmdQ) {
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(mockContext.getDevice(0)->getRootDeviceEnvironment());
std::unique_ptr<Image> image(ImageHelper<Image1dDefaults>::create(&mockContext));
EXPECT_TRUE(image->mappingOnCpuAllowed());

View File

@@ -276,6 +276,8 @@ int main(int argc, char **argv) {
revId = platform.usRevId;
}
NEO::DebugManager.flags.ForcePreferredAllocationMethod.set(0); // Force allocation in system for ULTs
adjustHwInfoForTests(hwInfoForTests, euPerSubSlice, sliceCount, subSlicePerSliceCount, dieRecovery);
binaryNameSuffix.append(hardwarePrefix[hwInfoForTests.platform.eProductFamily]);

View File

@@ -26,6 +26,7 @@
#include "shared/test/common/mocks/mock_host_ptr_manager.h"
#include "shared/test/common/mocks/ult_device_factory.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "shared/test/common/test_macros/test_checks_shared.h"
#include "opencl/extensions/public/cl_ext_private.h"
#include "opencl/source/command_queue/command_queue_hw.h"
@@ -77,6 +78,7 @@ TEST(Buffer, whenBufferAllocatedInLocalMemoryThenCpuCopyIsDisallowed) {
MockBuffer buffer(allocation);
UltDeviceFactory factory{1, 0};
auto &device = *factory.rootDevices[0];
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(factory.rootDevices[0]->getRootDeviceEnvironment());
allocation.memoryPool = MemoryPool::LocalMemory;
EXPECT_FALSE(buffer.isReadWriteOnCpuAllowed(device));

View File

@@ -1298,6 +1298,7 @@ TEST(ImageTest, givenImageWhenAskedForPtrOffsetForCpuMappingThenReturnCorrectVal
DebugManagerStateRestore restore;
DebugManager.flags.ForceLinearImages.set(true);
MockContext ctx;
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(ctx.getDevice(0)->getRootDeviceEnvironment());
std::unique_ptr<Image> image(ImageHelper<Image3dDefaults>::create(&ctx));
EXPECT_TRUE(image->mappingOnCpuAllowed());
@@ -1313,6 +1314,7 @@ TEST(ImageTest, givenImageWhenAskedForPtrOffsetForCpuMappingThenReturnCorrectVal
TEST(ImageTest, given1DArrayImageWhenAskedForPtrOffsetForMappingThenReturnCorrectValue) {
MockContext ctx;
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(ctx.getDevice(0)->getRootDeviceEnvironment());
std::unique_ptr<Image> image(ImageHelper<Image1dArrayDefaults>::create(&ctx));
MemObjOffsetArray origin = {{4, 5, 0}};
@@ -1345,6 +1347,7 @@ TEST(ImageTest, givenImageWhenAskedForPtrLengthForCpuMappingThenReturnCorrectVal
DebugManagerStateRestore restore;
DebugManager.flags.ForceLinearImages.set(true);
MockContext ctx;
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(ctx.getDevice(0)->getRootDeviceEnvironment());
std::unique_ptr<Image> image(ImageHelper<Image3dDefaults>::create(&ctx));
EXPECT_TRUE(image->mappingOnCpuAllowed());

View File

@@ -16,6 +16,7 @@
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h"
#include "shared/test/common/mocks/mock_memory_manager.h"
#include "shared/test/common/test_macros/test_checks_shared.h"
#include "opencl/source/helpers/cl_memory_properties_helpers.h"
#include "opencl/source/helpers/properties_helper.h"
@@ -301,15 +302,23 @@ TEST(MemObj, givenTiledObjectWhenAskedForCpuMappingThenReturnFalse) {
bool isTiledAllocation() const override { return true; }
};
MockContext context;
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(context.getDevice(0)->getRootDeviceEnvironment());
MockMemoryManager memoryManager(*context.getDevice(0)->getExecutionEnvironment());
context.memoryManager = &memoryManager;
auto allocation = memoryManager.allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), MemoryConstants::pageSize});
allocation->setDefaultGmm(new Gmm(context.getDevice(0)->getGmmHelper(), nullptr, MemoryConstants::pageSize, 0, GMM_RESOURCE_USAGE_OCL_BUFFER, false, {}, true));
auto memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(CL_MEM_COPY_HOST_PTR, 0, 0, &context.getDevice(0)->getDevice());
MyMemObj memObj(nullptr, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_COPY_HOST_PTR, 0,
MemoryConstants::pageSize, nullptr, nullptr, 0, true, false, false);
MyMemObj memObj(&context, CL_MEM_OBJECT_BUFFER, memoryProperties, CL_MEM_COPY_HOST_PTR, 0,
MemoryConstants::pageSize, allocation->getUnderlyingBuffer(), nullptr, GraphicsAllocationHelper::toMultiGraphicsAllocation(allocation), true, false, false);
EXPECT_FALSE(memObj.mappingOnCpuAllowed());
}
TEST(MemObj, givenCompressedGmmWhenAskingForMappingOnCpuThenDisallow) {
MockContext context;
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(context.getDevice(0)->getRootDeviceEnvironment());
MockMemoryManager memoryManager(*context.getDevice(0)->getExecutionEnvironment());
context.memoryManager = &memoryManager;
@@ -328,6 +337,7 @@ TEST(MemObj, givenCompressedGmmWhenAskingForMappingOnCpuThenDisallow) {
TEST(MemObj, givenDefaultWhenAskedForCpuMappingThenReturnTrue) {
MockContext context;
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(context.getDevice(0)->getRootDeviceEnvironment());
MockMemoryManager memoryManager(*context.getDevice(0)->getExecutionEnvironment());
context.memoryManager = &memoryManager;
@@ -344,6 +354,7 @@ TEST(MemObj, givenDefaultWhenAskedForCpuMappingThenReturnTrue) {
TEST(MemObj, givenNonCpuAccessibleMemoryWhenAskingForMappingOnCpuThenDisallow) {
MockContext context;
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(context.getDevice(0)->getRootDeviceEnvironment());
MockMemoryManager memoryManager(*context.getDevice(0)->getExecutionEnvironment());
context.memoryManager = &memoryManager;

View File

@@ -1130,6 +1130,7 @@ TEST(UnifiedSharedMemoryTransferCalls, givenHostUsmAllocationWhenPtrIsUsedForTra
DebugManager.flags.EnableLocalMemory.set(false);
DebugManager.flags.ForceLocalMemoryAccessMode.set(static_cast<int32_t>(LocalMemoryAccessMode::Default));
MockContext mockContext;
REQUIRE_CPU_MEM_ACCESS_OR_SKIP(mockContext.getDevice(0)->getRootDeviceEnvironment());
cl_context clContext = &mockContext;
if (mockContext.getDevice(0u)->getHardwareInfo().capabilityTable.supportsOcl21Features == false) {

View File

@@ -149,6 +149,8 @@ int main(int argc, char **argv) {
}
}
NEO::DebugManager.flags.ForcePreferredAllocationMethod.set(0); // Force allocation in system for ULTs
auto productConfigHelper = new ProductConfigHelper();
auto allEnabledDeviceConfigs = productConfigHelper->getDeviceAotInfo();

View File

@@ -504,7 +504,7 @@ DECLARE_DEBUG_VARIABLE(bool, SkipFlushingEventsOnGetStatusCalls, false, "When se
DECLARE_DEBUG_VARIABLE(bool, AllowUnrestrictedSize, false, "Allow allocating memory with greater size than MAX_MEM_ALLOC_SIZE")
DECLARE_DEBUG_VARIABLE(bool, ForceDefaultThreadArbitrationPolicyIfNotSpecified, false, "When executing kernel without thread arbitration hint specified, ensure the default setting is used")
DECLARE_DEBUG_VARIABLE(bool, ForceAllResourcesUncached, false, "When set, all memory operations for all resources are forced to UC. This overrides all caching-related debug variables and globally disables all caches")
DECLARE_DEBUG_VARIABLE(bool, EnableCpuCacheForResources, true, "When true, driver will set gmm flag cacheable related to caching on cpu, for resources where it is allowed")
DECLARE_DEBUG_VARIABLE(bool, EnableCpuCacheForResources, false, "When true, driver will set gmm flag cacheable related to caching on cpu, for resources where it is allowed")
DECLARE_DEBUG_VARIABLE(bool, EnableDebuggerMmapMemoryAccess, false, "Mmap used to access memory by debug api, valid only on Linux OS")
DECLARE_DEBUG_VARIABLE(bool, ForceDefaultGrfCompilationMode, false, "Adds build option -cl-intel-128-GRF-per-thread to force kernel compilation in Default-GRF mode")
DECLARE_DEBUG_VARIABLE(bool, ForceLargeGrfCompilationMode, false, "Adds build option -cl-intel-256-GRF-per-thread to force kernel compilation in Large-GRF mode")

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021 Intel Corporation
* Copyright (C) 2021-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -9,7 +9,7 @@
namespace NEO {
const GfxMemoryAllocationMethod preferredAllocationMethod = GfxMemoryAllocationMethod::UseUmdSystemPtr;
const GfxMemoryAllocationMethod preferredAllocationMethod = GfxMemoryAllocationMethod::AllocateByKmd;
size_t WddmMemoryManager::getHugeGfxMemoryChunkSize(GfxMemoryAllocationMethod allocationMethod) const {
return 4 * MemoryConstants::gigaByte - MemoryConstants::pageSize64k;

View File

@@ -52,7 +52,7 @@ class MockWddmMemoryManager : public MemoryManagerCreate<WddmMemoryManager> {
return BaseClass::allocateGraphicsMemoryInDevicePool(allocationData, status);
}
size_t hugeGfxMemoryChunkSize = BaseClass::getHugeGfxMemoryChunkSize(preferredAllocationMethod);
size_t hugeGfxMemoryChunkSize = BaseClass::getHugeGfxMemoryChunkSize(MockWddmMemoryManager::getPreferredAllocationMethod());
size_t getHugeGfxMemoryChunkSize(GfxMemoryAllocationMethod allocationMethod) const override { return hugeGfxMemoryChunkSize; }
MockWddmMemoryManager(ExecutionEnvironment &executionEnvironment) : MemoryManagerCreate(false, false, executionEnvironment) {

View File

@@ -532,7 +532,7 @@ ExitOnSubmissionMode = 0
ForceInOrderImmediateCmdListExecution = -1
ForceTlbFlush = -1
DebugSetMemoryDiagnosticsDelay = -1
EnableCpuCacheForResources = 1
EnableCpuCacheForResources = 0
OverrideHwIpVersion = -1
PrintGlobalTimestampInNs = 0
EnableDeviceStateVerification = -1

View File

@@ -55,4 +55,9 @@ bool TestChecks::supportsSvm(const std::unique_ptr<HardwareInfo> &pHardwareInfo)
}
bool TestChecks::supportsSvm(const Device *pDevice) {
return supportsSvm(&pDevice->getHardwareInfo());
}
bool TestChecks::supportsCpuMemAccess(const RootDeviceEnvironment &rootDeviceEnvironment) {
auto &productHelper = rootDeviceEnvironment.getHelper<ProductHelper>();
return productHelper.isCachingOnCpuAvailable();
}

View File

@@ -18,6 +18,7 @@ struct RootDeviceEnvironment;
namespace TestChecks {
bool supportsBlitter(const RootDeviceEnvironment &rootDeviceEnvironment);
bool fullySupportsBlitter(const RootDeviceEnvironment &rootDeviceEnvironment);
bool supportsCpuMemAccess(const RootDeviceEnvironment &rootDeviceEnvironment);
bool supportsImages(const HardwareInfo &hardwareInfo);
bool supportsImages(const std::unique_ptr<HardwareInfo> &pHardwareInfo);
bool supportsSvm(const HardwareInfo *pHardwareInfo);
@@ -56,3 +57,8 @@ bool supportsSvm(const Device *pDevice);
if (NEO::TestChecks::supportsImages(param) == false) { \
GTEST_SKIP(); \
}
#define REQUIRE_CPU_MEM_ACCESS_OR_SKIP(rootDeviceEnvironment) \
if (NEO::TestChecks::supportsCpuMemAccess(rootDeviceEnvironment) == false) { \
GTEST_SKIP(); \
}

View File

@@ -748,6 +748,9 @@ HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWhenInitializeEngineIs
}
HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWithAubManagerWhenInitFileIsCalledThenMemTraceCommentWithDriverVersionIsPutIntoAubStream) {
DebugManagerStateRestore stateRestore;
DebugManager.flags.ForcePreferredAllocationMethod.set(-1);
auto mockAubManager = std::make_unique<MockAubManager>();
auto aubExecutionEnvironment = getEnvironment<AUBCommandStreamReceiverHw<FamilyType>>(false, true, true);
auto aubCsr = aubExecutionEnvironment->template getCsr<AUBCommandStreamReceiverHw<FamilyType>>();
@@ -986,6 +989,7 @@ HWTEST_F(AubFileStreamTests, givenGenerateAubFilePerProcessIdDebugFlagAndAubComm
HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWithAubManagerWhenInitFileIsCalledThenCommentWithNonDefaultFlagsAreAdded) {
DebugManagerStateRestore stateRestore;
DebugManager.flags.ForcePreferredAllocationMethod.set(-1);
DebugManager.flags.MakeAllBuffersResident.set(1);
DebugManager.flags.ZE_AFFINITY_MASK.set("non-default");

View File

@@ -192,6 +192,9 @@ TEST(ExecutionEnvironment, givenNeoCalEnabledWhenCreateExecutionEnvironmentThenS
{"UseKmdMigration", 0},
{"SplitBcsSize", 256}};
DebugManagerStateRestore restorer;
DebugManager.flags.ForcePreferredAllocationMethod.set(-1);
#undef DECLARE_DEBUG_VARIABLE
#define DECLARE_DEBUG_VARIABLE(dataType, variableName, defaultValue, description) \
EXPECT_EQ(defaultValue, DebugManager.flags.variableName.getRef());
@@ -202,7 +205,6 @@ TEST(ExecutionEnvironment, givenNeoCalEnabledWhenCreateExecutionEnvironmentThenS
#undef DECLARE_DEBUG_VARIABLE
DebugManagerStateRestore restorer;
DebugManager.flags.NEO_CAL_ENABLED.set(1);
ExecutionEnvironment exeEnv;

View File

@@ -304,6 +304,8 @@ int main(int argc, char **argv) {
revId = platform.usRevId;
}
NEO::DebugManager.flags.ForcePreferredAllocationMethod.set(0); // Force allocation in system for ULTs
adjustHwInfoForTests(hwInfoForTests, euPerSubSlice, sliceCount, subSlicePerSliceCount, dieRecovery);
binaryNameSuffix.append(hardwarePrefix[hwInfoForTests.platform.eProductFamily]);

View File

@@ -18,6 +18,7 @@
#include "shared/source/os_interface/windows/wddm/um_km_data_translator.h"
#include "shared/source/os_interface/windows/wddm/wddm.h"
#include "shared/source/os_interface/windows/wddm_memory_manager.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/default_hw_info.h"
#include "shared/test/common/mocks/mock_execution_environment.h"
#include "shared/test/common/mocks/mock_gmm.h"
@@ -627,6 +628,9 @@ TEST_F(WddmLinuxConfigureReduced48bitDeviceAddressSpaceTest, givenTwoSvmAddressS
}
TEST_F(WddmLinuxTest, givenRequestFor32bitAllocationWithoutPreexistingHostPtrWhenAllocatingThroughKmdIsPreferredThenAllocateThroughKmdAndLockAllocation) {
DebugManagerStateRestore stateRestore;
DebugManager.flags.ForcePreferredAllocationMethod.set(static_cast<int32_t>(GfxMemoryAllocationMethod::AllocateByKmd));
osEnvironment->gdi->reserveGpuVirtualAddress = reserveDeviceAddressSpaceMock;
osEnvironment->gdi->createAllocation2 = createAllocation2Mock;
osEnvironment->gdi->mapGpuVirtualAddress = mapGpuVirtualAddressMock;

View File

@@ -76,7 +76,7 @@ class MockAllocateGraphicsMemoryWithAlignmentWddm : public MemoryManagerCreate<W
size_t getHugeGfxMemoryChunkSize(GfxMemoryAllocationMethod allocationMethod) const override {
return hugeGfxMemoryChunkSize;
}
size_t hugeGfxMemoryChunkSize = WddmMemoryManager::getHugeGfxMemoryChunkSize(preferredAllocationMethod);
size_t hugeGfxMemoryChunkSize = WddmMemoryManager::getHugeGfxMemoryChunkSize(MockWddmMemoryManager::getPreferredAllocationMethod());
};
class WddmMemoryManagerTests : public ::testing::Test {
@@ -109,7 +109,7 @@ TEST_F(WddmMemoryManagerTests, GivenAllocDataWithSVMCPUSetWhenAllocateGraphicsMe
allocData.makeGPUVaDifferentThanCPUPtr = true;
memoryManager->allocateGraphicsMemoryWithAlignment(allocData);
if (preferredAllocationMethod == GfxMemoryAllocationMethod::AllocateByKmd) {
if (MockWddmMemoryManager::getPreferredAllocationMethod() == GfxMemoryAllocationMethod::AllocateByKmd) {
EXPECT_TRUE(memoryManager->allocateGraphicsMemoryUsingKmdAndMapItToCpuVACalled);
} else {
EXPECT_TRUE(memoryManager->allocateSystemMemoryAndCreateGraphicsAllocationFromItCalled);
@@ -230,6 +230,9 @@ class WddmMemoryManagerAllocPathTests : public ::testing::Test {
};
TEST_F(WddmMemoryManagerAllocPathTests, givenAllocateGraphicsMemoryUsingKmdAndMapItToCpuVAWhenPreferedAllocationMethodThenProperArgumentsAreSet) {
DebugManagerStateRestore stateRestore;
DebugManager.flags.ForcePreferredAllocationMethod.set(-1);
{
NEO::AllocationData allocData = {};
allocData.type = NEO::AllocationType::SVM_CPU;
@@ -237,7 +240,7 @@ TEST_F(WddmMemoryManagerAllocPathTests, givenAllocateGraphicsMemoryUsingKmdAndMa
allocData.makeGPUVaDifferentThanCPUPtr = true;
auto graphicsAllocation = memoryManager->allocateGraphicsMemoryUsingKmdAndMapItToCpuVA(allocData, false);
if (preferredAllocationMethod == GfxMemoryAllocationMethod::AllocateByKmd) {
if (preferredAllocationMethod == GfxMemoryAllocationMethod::AllocateByKmd && !is32bit) {
EXPECT_FALSE(memoryManager->mapGpuVirtualAddressWithCpuPtr);
} else {
EXPECT_TRUE(memoryManager->mapGpuVirtualAddressWithCpuPtr);
@@ -494,7 +497,7 @@ TEST_F(WddmMemoryManagerTests, givenTypeWhenCallIsStatelessAccessRequiredThenPro
TEST_F(WddmMemoryManagerTests, givenForcePreferredAllocationMethodFlagSetWhenGettingPreferredAllocationMethodThenValueFlagIsReturned) {
DebugManagerStateRestore restorer;
EXPECT_EQ(preferredAllocationMethod, MockWddmMemoryManager::getPreferredAllocationMethod());
EXPECT_EQ(preferredAllocationMethod, GfxMemoryAllocationMethod::AllocateByKmd);
for (const auto &allocationMethod : {GfxMemoryAllocationMethod::UseUmdSystemPtr, GfxMemoryAllocationMethod::AllocateByKmd}) {
DebugManager.flags.ForcePreferredAllocationMethod.set(static_cast<int32_t>(allocationMethod));
@@ -569,6 +572,7 @@ class WddmMemoryManagerSimpleTest : public ::testing::Test {
} else {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.ForcePreferredAllocationMethod.set(-1);
wddm->init();
wddm->mapGpuVaStatus = true;
VariableBackup<bool> restorer{&wddm->callBaseMapGpuVa, false};
@@ -1422,6 +1426,9 @@ TEST_F(WddmMemoryManagerSimpleTest, whenAlignmentRequirementExceedsPageSizeThenA
} callCount;
};
DebugManagerStateRestore stateRestore;
DebugManager.flags.ForcePreferredAllocationMethod.set(-1);
MockWddmMemoryManagerAllocateWithAlignment memoryManager(true, true, executionEnvironment);
AllocationData allocData = {};
@@ -3165,6 +3172,9 @@ TEST_F(MockWddmMemoryManagerTest, givenAllocateGraphicsMemoryForBufferAndRequest
}
TEST_F(MockWddmMemoryManagerTest, givenDefaultMemoryManagerWhenItIsCreatedThenCorrectHugeGfxMemoryChunkIsSet) {
DebugManagerStateRestore stateRestore;
DebugManager.flags.ForcePreferredAllocationMethod.set(-1);
MockWddmMemoryManager memoryManager(executionEnvironment);
EXPECT_EQ(memoryManager.getHugeGfxMemoryChunkSize(GfxMemoryAllocationMethod::AllocateByKmd), 4 * MemoryConstants::gigaByte - MemoryConstants::pageSize64k);
EXPECT_EQ(memoryManager.getHugeGfxMemoryChunkSize(GfxMemoryAllocationMethod::UseUmdSystemPtr), 4 * MemoryConstants::gigaByte - MemoryConstants::pageSize64k);

View File

@@ -56,7 +56,7 @@ TEST_F(WddmFrontWindowPoolAllocatorTests, givenAllocateInFrontWindowPoolFlagWhen
auto gmmHelper = memManager->getGmmHelper(allocData.rootDeviceIndex);
EXPECT_EQ(allocation->getGpuBaseAddress(), gmmHelper->canonize(allocation->getGpuAddress()));
if (preferredAllocationMethod == GfxMemoryAllocationMethod::AllocateByKmd) {
if (MockWddmMemoryManager::getPreferredAllocationMethod() == GfxMemoryAllocationMethod::AllocateByKmd) {
EXPECT_TRUE(allocation->isAllocationLockable());
} else {
EXPECT_FALSE(allocation->isAllocationLockable());