mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 22:12:59 +08:00
Revert "Set isLockable if size small enough for cpu memcpy"
This reverts commit 41a80072b9.
Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
8468d6c4b0
commit
c54c3d796c
@@ -623,9 +623,14 @@ bool CommandListCoreFamilyImmediate<gfxCoreFamily>::preferCopyThroughLockedPtr(N
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t h2DThreshold = 0;
|
||||
size_t d2HThreshold = 0;
|
||||
NEO::GfxCoreHelper::getCpuCopyThresholds(h2DThreshold, d2HThreshold);
|
||||
size_t h2DThreshold = 2 * MemoryConstants::megaByte;
|
||||
size_t d2HThreshold = 1 * MemoryConstants::kiloByte;
|
||||
if (NEO::DebugManager.flags.ExperimentalH2DCpuCopyThreshold.get() != -1) {
|
||||
h2DThreshold = NEO::DebugManager.flags.ExperimentalH2DCpuCopyThreshold.get();
|
||||
}
|
||||
if (NEO::DebugManager.flags.ExperimentalD2HCpuCopyThreshold.get() != -1) {
|
||||
d2HThreshold = NEO::DebugManager.flags.ExperimentalD2HCpuCopyThreshold.get();
|
||||
}
|
||||
if (NEO::GfxCoreHelper::get(this->device->getHwInfo().platform.eRenderCoreFamily).copyThroughLockedPtrEnabled(this->device->getHwInfo())) {
|
||||
return (!srcFound && isSuitableUSMDeviceAlloc(dstAlloc, dstFound) && size <= h2DThreshold) ||
|
||||
(!dstFound && isSuitableUSMDeviceAlloc(srcAlloc, srcFound) && size <= d2HThreshold);
|
||||
@@ -636,8 +641,7 @@ bool CommandListCoreFamilyImmediate<gfxCoreFamily>::preferCopyThroughLockedPtr(N
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isSuitableUSMDeviceAlloc(NEO::SvmAllocationData *alloc, bool allocFound) {
|
||||
return allocFound && (alloc->memoryType == InternalMemoryType::DEVICE_UNIFIED_MEMORY) &&
|
||||
alloc->gpuAllocations.getGraphicsAllocation(this->device->getRootDeviceIndex())->storageInfo.getNumBanks() == 1 &&
|
||||
alloc->gpuAllocations.getGraphicsAllocation(this->device->getRootDeviceIndex())->storageInfo.isLockable;
|
||||
alloc->gpuAllocations.getGraphicsAllocation(this->device->getRootDeviceIndex())->storageInfo.getNumBanks() == 1;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
|
||||
@@ -172,6 +172,7 @@ ze_result_t ContextImp::allocDeviceMem(ze_device_handle_t hDevice,
|
||||
unifiedMemoryProperties.allocationFlags.flags.shareable = isShareableMemory(deviceDesc->pNext, static_cast<uint32_t>(lookupTable.exportMemory), neoDevice);
|
||||
unifiedMemoryProperties.device = neoDevice;
|
||||
unifiedMemoryProperties.allocationFlags.flags.compressedHint = isAllocationSuitableForCompression(lookupTable, *device, size);
|
||||
|
||||
if (deviceDesc->flags & ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED) {
|
||||
unifiedMemoryProperties.allocationFlags.flags.locallyUncachedResource = 1;
|
||||
}
|
||||
|
||||
@@ -1883,9 +1883,8 @@ HWTEST_F(CommandListCreate, givenCommandListWhenRemoveDeallocationContainerDataT
|
||||
|
||||
struct AppendMemoryLockedCopyFixture : public DeviceFixture {
|
||||
void setUp() {
|
||||
DebugManager.flags.EnableLocalMemory.set(1);
|
||||
DebugManager.flags.ExperimentalCopyThroughLock.set(1);
|
||||
DebugManager.flags.ForceLocalMemoryAccessMode.set(0);
|
||||
DebugManager.flags.EnableLocalMemory.set(1);
|
||||
DeviceFixture::setUp();
|
||||
|
||||
nonUsmHostPtr = new char[sz];
|
||||
@@ -1901,7 +1900,7 @@ struct AppendMemoryLockedCopyFixture : public DeviceFixture {
|
||||
DebugManagerStateRestore restore;
|
||||
char *nonUsmHostPtr;
|
||||
void *devicePtr;
|
||||
size_t sz = 2 * MemoryConstants::megaByte;
|
||||
size_t sz = 4 * MemoryConstants::megaByte;
|
||||
};
|
||||
|
||||
using AppendMemoryLockedCopyTest = Test<AppendMemoryLockedCopyFixture>;
|
||||
@@ -1925,9 +1924,6 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListWhenIsSuitableUSM
|
||||
auto dstFound = device->getDriverHandle()->findAllocationDataForRange(devicePtr, 1024, &dstAllocData);
|
||||
EXPECT_FALSE(cmdList.isSuitableUSMDeviceAlloc(srcAllocData, srcFound));
|
||||
EXPECT_TRUE(cmdList.isSuitableUSMDeviceAlloc(dstAllocData, dstFound));
|
||||
|
||||
dstAllocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex())->storageInfo.isLockable = 0;
|
||||
EXPECT_FALSE(cmdList.isSuitableUSMDeviceAlloc(dstAllocData, dstFound));
|
||||
}
|
||||
|
||||
struct LocalMemoryMultiSubDeviceFixture : public SingleRootMultiSubDeviceFixture {
|
||||
@@ -2290,50 +2286,22 @@ HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndNonUsmDstHostP
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndD2HCopyWhenSizeTooLargeButFlagSetThenUseCpuMemcpy, IsAtLeastSkl) {
|
||||
constexpr size_t largeSize = 3 * MemoryConstants::megaByte;
|
||||
DebugManager.flags.ExperimentalD2HCpuCopyThreshold.set(largeSize);
|
||||
DebugManager.flags.ExperimentalD2HCpuCopyThreshold.set(2048);
|
||||
MockAppendMemoryLockedCopyTestImmediateCmdList<gfxCoreFamily> cmdList;
|
||||
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver;
|
||||
ze_device_mem_alloc_desc_t deviceDesc = {};
|
||||
void *deviceAlloc;
|
||||
char *hostAlloc = new char[largeSize];
|
||||
context->allocDeviceMem(device->toHandle(), &deviceDesc, largeSize, 1u, &deviceAlloc);
|
||||
cmdList.appendMemoryCopy(hostAlloc, deviceAlloc, largeSize, nullptr, 0, nullptr);
|
||||
EXPECT_EQ(cmdList.appendMemoryCopyKernelWithGACalled, 0u);
|
||||
context->freeMem(deviceAlloc);
|
||||
delete[] hostAlloc;
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndH2DCopyWhenSizeTooLargeThenUseGpuMemcpy, IsAtLeastSkl) {
|
||||
constexpr size_t largeSize = 3 * MemoryConstants::megaByte;
|
||||
MockAppendMemoryLockedCopyTestImmediateCmdList<gfxCoreFamily> cmdList;
|
||||
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver;
|
||||
ze_device_mem_alloc_desc_t deviceDesc = {};
|
||||
void *deviceAlloc;
|
||||
char *hostAlloc = new char[largeSize];
|
||||
context->allocDeviceMem(device->toHandle(), &deviceDesc, largeSize, 1u, &deviceAlloc);
|
||||
cmdList.appendMemoryCopy(deviceAlloc, hostAlloc, largeSize, nullptr, 0, nullptr);
|
||||
EXPECT_EQ(cmdList.appendMemoryCopyKernelWithGACalled, 1u);
|
||||
context->freeMem(deviceAlloc);
|
||||
delete[] hostAlloc;
|
||||
cmdList.appendMemoryCopy(nonUsmHostPtr, devicePtr, 2 * MemoryConstants::kiloByte, nullptr, 0, nullptr);
|
||||
EXPECT_EQ(cmdList.appendMemoryCopyKernelWithGACalled, 0u);
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndH2DCopyWhenSizeTooLargeButFlagSetThenUseCpuMemcpy, IsAtLeastSkl) {
|
||||
constexpr size_t largeSize = 3 * MemoryConstants::megaByte;
|
||||
DebugManager.flags.ExperimentalH2DCpuCopyThreshold.set(largeSize);
|
||||
DebugManager.flags.ExperimentalH2DCpuCopyThreshold.set(3 * MemoryConstants::megaByte);
|
||||
MockAppendMemoryLockedCopyTestImmediateCmdList<gfxCoreFamily> cmdList;
|
||||
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
cmdList.csr = device->getNEODevice()->getInternalEngine().commandStreamReceiver;
|
||||
ze_device_mem_alloc_desc_t deviceDesc = {};
|
||||
void *deviceAlloc;
|
||||
char *hostAlloc = new char[largeSize];
|
||||
context->allocDeviceMem(device->toHandle(), &deviceDesc, largeSize, 1u, &deviceAlloc);
|
||||
cmdList.appendMemoryCopy(deviceAlloc, hostAlloc, largeSize, nullptr, 0, nullptr);
|
||||
cmdList.appendMemoryCopy(devicePtr, nonUsmHostPtr, 3 * MemoryConstants::megaByte, nullptr, 0, nullptr);
|
||||
EXPECT_EQ(cmdList.appendMemoryCopyKernelWithGACalled, 0u);
|
||||
context->freeMem(deviceAlloc);
|
||||
delete[] hostAlloc;
|
||||
}
|
||||
|
||||
HWTEST2_F(AppendMemoryLockedCopyTest, givenImmediateCommandListAndCpuMemcpyWithDependencyThenAppendBarrierCalled, IsAtLeastSkl) {
|
||||
|
||||
@@ -93,8 +93,3 @@ inline constexpr uint32_t maximalSimdSize = 32;
|
||||
inline constexpr uint32_t maximalSizeOfAtomicType = 8;
|
||||
inline constexpr uint32_t engineGroupCount = static_cast<uint32_t>(NEO::EngineGroupType::MaxEngineGroups);
|
||||
} // namespace CommonConstants
|
||||
|
||||
namespace NonUsmCpuCopyConstants {
|
||||
constexpr size_t h2DThreshold = 2 * MemoryConstants::megaByte;
|
||||
constexpr size_t d2HThreshold = 1 * MemoryConstants::kiloByte;
|
||||
} // namespace NonUsmCpuCopyConstants
|
||||
@@ -8,7 +8,6 @@
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/helpers/constants.h"
|
||||
#include "shared/source/helpers/hw_info.h"
|
||||
|
||||
#include <algorithm>
|
||||
@@ -81,16 +80,4 @@ uint32_t GfxCoreHelper::getHighestEnabledSlice(const HardwareInfo &hwInfo) {
|
||||
return highestEnabledSlice;
|
||||
}
|
||||
|
||||
void HwHelper::getCpuCopyThresholds(size_t &h2DThreshold, size_t &d2HThreshold) {
|
||||
h2DThreshold = NonUsmCpuCopyConstants::h2DThreshold;
|
||||
d2HThreshold = NonUsmCpuCopyConstants::d2HThreshold;
|
||||
|
||||
if (NEO::DebugManager.flags.ExperimentalH2DCpuCopyThreshold.get() != -1) {
|
||||
h2DThreshold = NEO::DebugManager.flags.ExperimentalH2DCpuCopyThreshold.get();
|
||||
}
|
||||
if (NEO::DebugManager.flags.ExperimentalD2HCpuCopyThreshold.get() != -1) {
|
||||
d2HThreshold = NEO::DebugManager.flags.ExperimentalD2HCpuCopyThreshold.get();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -171,8 +171,6 @@ class GfxCoreHelper {
|
||||
virtual bool isTimestampShiftRequired() const = 0;
|
||||
virtual bool isRelaxedOrderingSupported() const = 0;
|
||||
|
||||
static void getCpuCopyThresholds(size_t &h2DThreshold, size_t &d2HThreshold);
|
||||
|
||||
protected:
|
||||
GfxCoreHelper() = default;
|
||||
};
|
||||
|
||||
@@ -10,7 +10,6 @@
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/helpers/aligned_memory.h"
|
||||
#include "shared/source/helpers/api_specific_config.h"
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
#include "shared/source/helpers/memory_properties_helpers.h"
|
||||
#include "shared/source/memory_manager/memory_manager.h"
|
||||
#include "shared/source/os_interface/hw_info_config.h"
|
||||
@@ -260,12 +259,6 @@ void *SVMAllocsManager::createUnifiedMemoryAllocation(size_t size,
|
||||
return allocationFromCache;
|
||||
}
|
||||
}
|
||||
size_t h2DThreshold = 0;
|
||||
size_t d2HThreshold = 0;
|
||||
GfxCoreHelper::getCpuCopyThresholds(h2DThreshold, d2HThreshold);
|
||||
if (size <= std::max(h2DThreshold, d2HThreshold)) {
|
||||
unifiedMemoryProperties.makeDeviceBufferLockable = 1;
|
||||
}
|
||||
} else if (memoryProperties.memoryType == InternalMemoryType::HOST_UNIFIED_MEMORY) {
|
||||
unifiedMemoryProperties.flags.isUSMHostAllocation = true;
|
||||
}
|
||||
|
||||
@@ -1433,23 +1433,3 @@ TEST(GfxCoreHelperTests, whenIsDynamicallyPopulatedisTrueThengetHighestEnabledSl
|
||||
auto maxSlice = gfxCoreHelper.getHighestEnabledSlice(hwInfo);
|
||||
EXPECT_EQ(maxSlice, 7u);
|
||||
}
|
||||
|
||||
TEST(GfxCoreHelperTests, whenGetCpuCopyThresholdsThenCorrectValueSet) {
|
||||
size_t h2DThreshold = 0;
|
||||
size_t d2HThreshold = 0;
|
||||
GfxCoreHelper::getCpuCopyThresholds(h2DThreshold, d2HThreshold);
|
||||
EXPECT_EQ(h2DThreshold, NonUsmCpuCopyConstants::h2DThreshold);
|
||||
EXPECT_EQ(d2HThreshold, NonUsmCpuCopyConstants::d2HThreshold);
|
||||
}
|
||||
|
||||
TEST(GfxCoreHelperTests, givenThresholdChangedWhenGetCpuCopyThresholdsThenCorrectValueSet) {
|
||||
DebugManagerStateRestore restore;
|
||||
DebugManager.flags.ExperimentalH2DCpuCopyThreshold.set(2048);
|
||||
DebugManager.flags.ExperimentalD2HCpuCopyThreshold.set(2048);
|
||||
|
||||
size_t h2DThreshold = 0;
|
||||
size_t d2HThreshold = 0;
|
||||
GfxCoreHelper::getCpuCopyThresholds(h2DThreshold, d2HThreshold);
|
||||
EXPECT_EQ(h2DThreshold, 2048u);
|
||||
EXPECT_EQ(d2HThreshold, 2048u);
|
||||
}
|
||||
@@ -92,36 +92,3 @@ TEST_F(SVMLocalMemoryAllocatorTest, givenKmdMigratedSharedAllocationWhenPrefetch
|
||||
|
||||
svmManager->freeSVMAlloc(ptr);
|
||||
}
|
||||
|
||||
TEST_F(SVMLocalMemoryAllocatorTest, whenCreateUnifiedMemoryAllocationWithSmallSizeThenSetLockable) {
|
||||
DebugManagerStateRestore restore;
|
||||
DebugManager.flags.ForceLocalMemoryAccessMode.set(0);
|
||||
DebugManager.flags.EnableLocalMemory.set(1);
|
||||
std::unique_ptr<UltDeviceFactory> deviceFactory(new UltDeviceFactory(1, 2));
|
||||
auto device = deviceFactory->rootDevices[0];
|
||||
SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields);
|
||||
unifiedMemoryProperties.device = device;
|
||||
auto ptr = svmManager->createUnifiedMemoryAllocation(4096u, unifiedMemoryProperties);
|
||||
EXPECT_NE(nullptr, ptr);
|
||||
EXPECT_TRUE(svmManager->getSVMAlloc(ptr)->gpuAllocations.getGraphicsAllocation(mockRootDeviceIndex)->storageInfo.isLockable);
|
||||
svmManager->freeSVMAlloc(ptr);
|
||||
}
|
||||
|
||||
TEST_F(SVMLocalMemoryAllocatorTest, whenCreateUnifiedMemoryAllocationWithLargeSizeThenSetLockable) {
|
||||
if (HwInfoConfig::get(defaultHwInfo->platform.eProductFamily)->isStorageInfoAdjustmentRequired()) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
DebugManagerStateRestore restore;
|
||||
DebugManager.flags.ForceLocalMemoryAccessMode.set(0);
|
||||
DebugManager.flags.EnableLocalMemory.set(1);
|
||||
std::unique_ptr<UltDeviceFactory> deviceFactory(new UltDeviceFactory(1, 2));
|
||||
auto device = deviceFactory->rootDevices[0];
|
||||
size_t largeSize = std::max(NonUsmCpuCopyConstants::d2HThreshold, NonUsmCpuCopyConstants::h2DThreshold) + 1;
|
||||
SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, rootDeviceIndices, deviceBitfields);
|
||||
unifiedMemoryProperties.device = device;
|
||||
auto ptr = svmManager->createUnifiedMemoryAllocation(largeSize, unifiedMemoryProperties);
|
||||
EXPECT_NE(nullptr, ptr);
|
||||
|
||||
EXPECT_FALSE(svmManager->getSVMAlloc(ptr)->gpuAllocations.getGraphicsAllocation(mockRootDeviceIndex)->storageInfo.isLockable);
|
||||
svmManager->freeSVMAlloc(ptr);
|
||||
}
|
||||
Reference in New Issue
Block a user